From 39b4b65ffdfae7bc169c329f6a1608040e24dd92 Mon Sep 17 00:00:00 2001
From: Bartosz Lesniewski <bartosz.lesniewski@intel.com>
Date: Thu, 29 Apr 2021 05:58:03 +0200
Subject: [PATCH 01/73] Fix Max and Average Pooling op behavior with ceil
 rounding (#5204)

* Fix Max and Avg Pooling behavior with ceil rounding

* Remove redundant code

* Add backend test to check the problematic case
---
 .../single_layer_tests/pooling.cpp            |  6 +-
 .../skip_tests_config.cpp                     |  2 -
 .../ngraph/runtime/reference/avg_pool.hpp     | 27 ++++++++-
 .../ngraph/runtime/reference/max_pool.hpp     |  8 ++-
 ngraph/test/backend/avg_pool.in.cpp           | 55 +++++++++++++++++++
 ngraph/test/backend/max_pool.in.cpp           | 25 +++++++++
 6 files changed, 117 insertions(+), 6 deletions(-)

diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp
index 98fa2a9ff84..0e71de6d4c3 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp
@@ -26,8 +26,10 @@ const std::vector<std::vector<size_t >> kernels = {{3, 3},
                                                           {3, 5}};
 const std::vector<std::vector<size_t >> kernel3D = {{2, 2, 2}};
 
-const std::vector<std::vector<size_t >> strides = {{1, 1},
-                                                          {1, 2}};
+const std::vector<std::vector<size_t>> strides = {{1, 1},
+                                                  {1, 2},
+                                                  {2, 1},
+                                                  {2, 2}};
 const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1},
                                                           {2, 2, 2}};
 const std::vector<std::vector<size_t >> stridess3D = {{2, 2, 2}};
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index 657232fb116..ef6daefea09 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -10,8 +10,6 @@
 
 std::vector<std::string> disabledTestPatterns() {
     std::vector<std::string> retVector{
-        // TODO: Issue 26264
-        R"(.*(MaxPool|AvgPool).*S\(1\.2\).*Rounding=ceil.*)",
         // TODO: Issue 31841
         R"(.*(QuantGroupConvBackpropData3D).*)",
         // TODO: Issue 31843
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/avg_pool.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/avg_pool.hpp
index 49a008476f8..e5c1be08788 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/avg_pool.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/avg_pool.hpp
@@ -177,6 +177,13 @@ namespace ngraph
                             input_batch_transform_start[i] + window_shape_this_dim;
                         input_batch_transform_padding_below[i] = padding_below[i - 2];
                         input_batch_transform_padding_above[i] = padding_above[i - 2];
+                        // If a window (kernel) is out of arg shape bounds, trim it to fit
+                        auto padded_upper_bound =
+                            arg_shape[i] + padding_below[i - 2] + padding_above[i - 2];
+                        if (input_batch_transform_end[i] > padded_upper_bound)
+                        {
+                            input_batch_transform_end[i] = padded_upper_bound;
+                        }
                     }
 
                     for (size_t i = 0; i < arg_shape.size(); i++)
@@ -204,6 +211,20 @@ namespace ngraph
                     T result = 0;
                     size_t n_elements = 0;
 
+                    // The below conditions are to provide conformance between the ref and plugins:
+                    // If exclude_padding is disabled (include_padding... enabled), then:
+                    // The size of window doesn't change even if the window was clipped to fit the
+                    // input, number of elements will be equal to window_size.width *
+                    // window_size.height. The exception from this rule is if padding is not
+                    // present, then window size is calculated each time.
+
+                    auto padding_present = padding_below[0] != 0 || padding_below[1] != 0 ||
+                                           padding_above[0] != 0 || padding_above[1] != 0;
+
+                    if (include_padding_in_avg_computation && padding_present)
+                    {
+                        n_elements = shape_size(window_shape);
+                    }
                     for (const Coordinate& input_batch_coord : input_batch_transform)
                     {
                         bool in_bounds =
@@ -214,7 +235,11 @@ namespace ngraph
                             T v = in_bounds ? arg[input_batch_transform.index(input_batch_coord)]
                                             : static_cast<T>(0);
                             result += v;
-                            n_elements++;
+                            if (!padding_present ||
+                                (in_bounds && !include_padding_in_avg_computation))
+                            {
+                                n_elements++;
+                            }
                         }
                     }
 
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/max_pool.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/max_pool.hpp
index f6c21b15966..885c2115756 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/max_pool.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/max_pool.hpp
@@ -6,7 +6,6 @@
 
 #include <cmath>
 #include <numeric>
-
 #include "ngraph/coordinate_transform.hpp"
 
 namespace ngraph
@@ -78,6 +77,13 @@ namespace ngraph
                         input_batch_transform_start[i] = movement_stride * out_coord[i];
                         input_batch_transform_end[i] =
                             input_batch_transform_start[i] + window_shape_this_dim;
+                        // If a window (kernel) is out of arg shape bounds, trim it to fit
+                        auto padded_upper_bound =
+                            arg_shape[i] + padding_below[i - 2] + padding_above[i - 2];
+                        if (input_batch_transform_end[i] > padded_upper_bound)
+                        {
+                            input_batch_transform_end[i] = padded_upper_bound;
+                        }
                         input_batch_transform_padding_below[i] = padding_below[i - 2];
                         input_batch_transform_padding_above[i] = padding_above[i - 2];
                     }
diff --git a/ngraph/test/backend/avg_pool.in.cpp b/ngraph/test/backend/avg_pool.in.cpp
index a67b4cdb2c9..3acaafde702 100644
--- a/ngraph/test/backend/avg_pool.in.cpp
+++ b/ngraph/test/backend/avg_pool.in.cpp
@@ -102,6 +102,61 @@ NGRAPH_TEST(${BACKEND_NAME}, avg_pool_2d_pad)
     test_case.run();
 }
 
+NGRAPH_TEST(${BACKEND_NAME}, avg_pool_2d_ceil_stride_pad)
+{
+    Shape in_shape{1, 1, 1, 5};
+    Shape out_shape{1, 1, 1, 3};
+    const Strides& strides{1, 2};
+    const Shape& pads_begin{1, 1};
+    const Shape& pads_end{1, 1};
+    const Shape& kernel{3, 3};
+    const bool exclude_pad = true;
+    const op::RoundingType rounding_type = op::RoundingType::CEIL;
+    const op::PadType pad_type = op::PadType::EXPLICIT;
+
+    auto A = make_shared<op::Parameter>(element::f32, in_shape);
+    auto avgPool = make_shared<op::v1::AvgPool>(
+        A, strides, pads_begin, pads_end, kernel, exclude_pad, rounding_type, pad_type);
+    auto f = make_shared<Function>(avgPool, ParameterVector{A});
+
+
+    std::vector<float> a{1, 2, 3, 4, 5};
+    std::vector<float> result{1.5, 3, 4.5};
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_input<float>({a});
+    test_case.add_expected_output<float>(out_shape, result);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, avg_pool_2d_ceil_stride_pad_include_padding)
+{
+    Shape in_shape{1, 1, 1, 5};
+    Shape out_shape{1, 1, 1, 3};
+    const Strides& strides{1, 2};
+    const Shape& pads_begin{1, 1};
+    const Shape& pads_end{1, 1};
+    const Shape& kernel{3, 3};
+    const bool exclude_pad = false;
+    const op::RoundingType rounding_type = op::RoundingType::CEIL;
+    const op::PadType pad_type = op::PadType::EXPLICIT;
+
+    auto A = make_shared<op::Parameter>(element::f32, in_shape);
+    auto avgPool = make_shared<op::v1::AvgPool>(
+        A, strides, pads_begin, pads_end, kernel, exclude_pad, rounding_type, pad_type);
+    auto f = make_shared<Function>(avgPool, ParameterVector{A});
+
+
+    std::vector<float> a{2.5, 2, 12, 4, 5};
+    std::vector<float> result{0.5, 2, 1};
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_input<float>({a});
+    test_case.add_expected_output<float>(out_shape, result);
+    test_case.run();
+}
+
+
 NGRAPH_TEST(${BACKEND_NAME}, avg_pool_2d_same_upper)
 {
     Shape in_shape{1, 1, 3, 3};
diff --git a/ngraph/test/backend/max_pool.in.cpp b/ngraph/test/backend/max_pool.in.cpp
index 6260436e31a..8db45b79904 100644
--- a/ngraph/test/backend/max_pool.in.cpp
+++ b/ngraph/test/backend/max_pool.in.cpp
@@ -99,6 +99,31 @@ NGRAPH_TEST(${BACKEND_NAME}, max_pool_2d_pad)
     test_case.run();
 }
 
+NGRAPH_TEST(${BACKEND_NAME}, max_pool_2d_ceil_stride_pad)
+{
+    Shape in_shape{1, 1, 1, 5};
+    Shape out_shape{1, 1, 1, 3};
+    const Strides& strides{1, 2};
+    const Shape& pads_begin{1, 1};
+    const Shape& pads_end{1, 1};
+    const Shape& kernel{3, 3};
+    const op::RoundingType rounding_type = op::RoundingType::CEIL;
+    const op::PadType pad_type = op::PadType::EXPLICIT;
+
+    auto A = make_shared<op::Parameter>(element::f32, in_shape);
+    auto maxPool = make_shared<op::v1::MaxPool>(
+        A, strides, pads_begin, pads_end, kernel, rounding_type, pad_type);
+    auto f = make_shared<Function>(maxPool, ParameterVector{A});
+
+    std::vector<float> a{1, 2, 3, 4, 5};
+    std::vector<float> result{2, 4, 5};
+
+    auto test_case = test::TestCase<TestEngine>(f);
+    test_case.add_input<float>({a});
+    test_case.add_expected_output<float>(out_shape, result);
+    test_case.run();
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, max_pool_2d_same_upper)
 {
     Shape in_shape{1, 1, 3, 3};

From b3de2282066a410c0c72f8298cb47af9d828146a Mon Sep 17 00:00:00 2001
From: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
Date: Thu, 29 Apr 2021 05:58:37 +0200
Subject: [PATCH 02/73] Transpose ngraph op and reference implementation
 revision (#5315)

* Extend single layer Transpose tests

* Add more Transpose backend tests

* Update Transpose reference implementation

* Use reference implementation in Transpose evaluate

* Add type prop test with interval dimension

* Move Transpose eval tests to separate file

* Update ie unit test manifest for dynamic cases

* Update evaluate default axes order calculation

* Add Transpose to summarize script

* Add serialize single layer tests

* Add axes order checks in evaluate

* Move ref function default args behaviour to legacy function

* Add new line at EOF

* Remove redundant transpose get_vector helper

* Add set_element_type in transpose evaluate

* Simplify vector convertion

* Add deprecation notice

* Add more tests

* Tests update

* Remove redundant reshape header

* Update unique axes check
---
 .../serialization/single_layer/transpose.cpp  |  71 ++++++
 .../single_layer_tests/transpose.cpp          |  76 ++++--
 .../layer_tests_summary/utils/constants.py    |   5 +-
 .../ngraph/runtime/reference/transpose.hpp    |  79 +++---
 ngraph/core/src/op/transpose.cpp              |  59 ++---
 ngraph/test/CMakeLists.txt                    |   1 +
 ngraph/test/backend/transpose.in.cpp          | 122 ++++++++-
 ngraph/test/eval.cpp                          |  80 ------
 ngraph/test/op_eval/transpose.cpp             | 236 ++++++++++++++++++
 ngraph/test/runtime/ie/unit_test.manifest     |   4 +-
 ngraph/test/type_prop/transpose.cpp           |  11 +
 11 files changed, 570 insertions(+), 174 deletions(-)
 create mode 100644 inference-engine/tests/functional/inference_engine/serialization/single_layer/transpose.cpp
 create mode 100644 ngraph/test/op_eval/transpose.cpp

diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/transpose.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/transpose.cpp
new file mode 100644
index 00000000000..4fd458c1244
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/transpose.cpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "shared_test_classes/single_layer/transpose.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+TEST_P(TransposeLayerTest, Serialize) {
+    Serialize();
+}
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::I32
+};
+
+std::vector<std::vector<size_t>> inputShape2D = {{2, 10}, {10, 2}, {10, 10}};
+std::vector<std::vector<size_t>> order2D      = {{}, {0, 1}, {1, 0}};
+
+INSTANTIATE_TEST_CASE_P(smoke_Transpose2D, TransposeLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(order2D),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::ValuesIn(inputShape2D),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                TransposeLayerTest::getTestCaseName);
+
+std::vector<std::vector<size_t>> inputShape4D = {{2, 2, 2, 2}};
+std::vector<std::vector<size_t>> order4D      = {
+        {}, {0, 1, 2, 3}
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_Transpose4D, TransposeLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(order4D),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::ValuesIn(inputShape4D),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                TransposeLayerTest::getTestCaseName);
+
+std::vector<std::vector<size_t>> inputShape5D = {{2, 3, 4, 5, 6}};
+std::vector<std::vector<size_t>> order5D      = {
+        {}, {0, 1, 2, 3, 4}
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_Transpose5D, TransposeLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(order5D),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::ValuesIn(inputShape5D),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                TransposeLayerTest::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/transpose.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/transpose.cpp
index 5742abd9fee..ee508a38ca7 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/transpose.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/transpose.cpp
@@ -10,36 +10,64 @@
 using namespace LayerTestsDefinitions;
 
 namespace {
-
 const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::I64,
+        InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::I16
 };
 
-const std::vector<std::vector<size_t>> inputShapes = {
-        std::vector<size_t>{1, 3, 100, 100},
+std::vector<std::vector<size_t>> inputShape2D = {{2, 10}, {10, 2}, {10, 10}};
+std::vector<std::vector<size_t>> order2D      = {{}, {0, 1}, {1, 0}};
+
+INSTANTIATE_TEST_CASE_P(smoke_Transpose2D, TransposeLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(order2D),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::ValuesIn(inputShape2D),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                TransposeLayerTest::getTestCaseName);
+
+std::vector<std::vector<size_t>> inputShape4D = {{2, 2, 2, 2}, {1, 10, 2, 3}, {2, 3, 4, 5}};
+std::vector<std::vector<size_t>> order4D      = {
+        {}, {0, 1, 2, 3}, {0, 1, 3, 2}, {0, 2, 1, 3}, {0, 2, 3, 1}, {0, 3, 1, 2}, {0, 3, 2, 1},
+        {1, 0, 2, 3}, {1, 0, 3, 2}, {1, 2, 0, 3}, {1, 2, 3, 0}, {1, 3, 0, 2}, {1, 3, 2, 0},
+        {2, 0, 1, 3}, {2, 0, 3, 1}, {2, 1, 0, 3}, {2, 1, 3, 0}, {2, 3, 0, 1}, {2, 3, 1, 0},
+        {3, 0, 1, 2}, {3, 0, 2, 1}, {3, 1, 0, 2}, {3, 1, 2, 0}, {3, 2, 0, 1}, {3, 2, 1, 0}
 };
 
-const std::vector<std::vector<size_t>> inputOrder = {
-        std::vector<size_t>{0, 3, 2, 1},
-        std::vector<size_t>{},
+INSTANTIATE_TEST_CASE_P(smoke_Transpose4D, TransposeLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(order4D),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::ValuesIn(inputShape4D),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                TransposeLayerTest::getTestCaseName);
+
+std::vector<std::vector<size_t>> inputShape5D = {{2, 2, 2, 2, 2}, {1, 10, 2, 3, 4}, {2, 3, 4, 5, 6}};
+std::vector<std::vector<size_t>> order5D      = {
+        {}, {0, 1, 2, 3, 4}, {1, 0, 2, 3, 4}, {4, 3, 2, 1, 0}, {0, 2, 3, 4, 1},
+        {1, 4, 2, 3, 0}, {2, 4, 1, 0, 3}, {3, 0, 2, 1, 4}, {4, 1, 0, 3, 2}
 };
 
-const auto params = testing::Combine(
-        testing::ValuesIn(inputOrder),
-        testing::ValuesIn(netPrecisions),
-        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-        testing::Values(InferenceEngine::Layout::ANY),
-        testing::Values(InferenceEngine::Layout::ANY),
-        testing::ValuesIn(inputShapes),
-        testing::Values(CommonTestUtils::DEVICE_CPU)
-);
-
-INSTANTIATE_TEST_CASE_P(
-        smoke_Transpose,
-        TransposeLayerTest,
-        params,
-        TransposeLayerTest::getTestCaseName
-);
-
+INSTANTIATE_TEST_CASE_P(smoke_Transpose5D, TransposeLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(order5D),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::Values(InferenceEngine::Layout::ANY),
+                ::testing::ValuesIn(inputShape5D),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                TransposeLayerTest::getTestCaseName);
 }  // namespace
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
index 5359efa3f99..6202322194d 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
@@ -80,5 +80,6 @@ VERIFIED_OP_REFERENCES = [
     'Swish-4',
     'Tile-1',
     'TopK-1',
-    'TopK-3'
-]
\ No newline at end of file
+    'TopK-3',
+    'Transpose-1'
+]
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/transpose.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/transpose.hpp
index fe9a6b8a772..82dd4075546 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/transpose.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/transpose.hpp
@@ -7,11 +7,9 @@
 #include <cfenv>
 #include <cmath>
 #include <numeric>
-#include <stdexcept>
 #include <vector>
 
-#include "ngraph/axis_vector.hpp"
-#include "ngraph/coordinate_transform.hpp"
+#include "ngraph/runtime/opt_kernel/reshape.hpp"
 #include "ngraph/shape.hpp"
 
 namespace ngraph
@@ -20,41 +18,62 @@ namespace ngraph
     {
         namespace reference
         {
-            template <typename T, typename U>
-            void transpose(const T* arg, T* out, Shape arg_size, const U* axes_order = nullptr)
+            void transpose(const char* data,
+                           char* out,
+                           const Shape& data_shape,
+                           size_t element_size,
+                           const int64_t* axes_order,
+                           Shape out_shape)
             {
-                std::vector<U> range_vector;
+                // To reuse opt_kernel::reshape axes order vector has to be converted to AxisVector
+                // Negative axes are not supported, it is validated by transpose evaluate method
+                std::vector<size_t> axis_vector(axes_order, axes_order + data_shape.size());
+                runtime::opt_kernel::reshape(
+                    data, out, data_shape, axis_vector, out_shape, element_size);
+            }
+
+            // Legacy function template to ensure backward compatibility
+            // Can be removed after ARM plugin start using evaluate or no template function
+            template <typename T, typename U>
+            NGRAPH_DEPRECATED(
+                "Traspose function with template types is deprecated, use function with char* "
+                "args.")
+            void transpose(const T* arg, T* out, Shape arg_shape, const U* axes_order = nullptr)
+            {
+                std::vector<std::int64_t> converted_axes_order(arg_shape.size());
                 if (axes_order == nullptr)
                 {
-                    range_vector.resize(arg_size.size());
-                    std::iota(range_vector.begin(), range_vector.end(), 0);
-                    std::reverse(range_vector.begin(), range_vector.end());
-                    axes_order = range_vector.data();
+                    std::iota(converted_axes_order.begin(), converted_axes_order.end(), 0);
+                    std::reverse(converted_axes_order.begin(), converted_axes_order.end());
                 }
-
-                std::vector<size_t> input_strides(arg_size.size());
-                std::vector<size_t> output_strides(arg_size.size());
-                input_strides.back() = 1;
-                output_strides.back() = 1;
-
-                for (int i = input_strides.size() - 2; i >= 0; i--)
+                else
                 {
-                    input_strides[i] = input_strides[i + 1] * arg_size[i + 1];
-                    output_strides[i] = output_strides[i + 1] * arg_size[axes_order[i + 1]];
-                }
-                for (int i = 0; i < shape_size(arg_size); ++i)
-                {
-                    size_t in_position = 0;
-                    size_t new_position = i;
-
-                    for (int j = 0; j < arg_size.size(); ++j)
+                    for (size_t i = 0; i < converted_axes_order.size(); ++i)
                     {
-                        in_position +=
-                            (new_position / output_strides[j]) * input_strides[axes_order[j]];
-                        new_position %= output_strides[j];
+                        converted_axes_order[i] = static_cast<std::int64_t>(axes_order[i]);
                     }
-                    out[i] = arg[in_position];
                 }
+                Shape output_shape(arg_shape.size());
+                std::transform(
+                    converted_axes_order.begin(),
+                    converted_axes_order.end(),
+                    output_shape.begin(),
+                    [&](const int64_t& v) {
+                        NGRAPH_CHECK(v >= 0,
+                                     "Negative values for transpose axes order are not supported.");
+                        NGRAPH_CHECK(v < int64_t(arg_shape.size()),
+                                     "Transpose axis ",
+                                     v,
+                                     " is out of shape range.");
+                        return arg_shape[v];
+                    });
+
+                transpose(reinterpret_cast<const char*>(arg),
+                          reinterpret_cast<char*>(out),
+                          arg_shape,
+                          sizeof(T),
+                          converted_axes_order.data(),
+                          output_shape);
             }
         } // namespace reference
     }     // namespace runtime
diff --git a/ngraph/core/src/op/transpose.cpp b/ngraph/core/src/op/transpose.cpp
index 6470efc2c15..8e8ff0c1181 100644
--- a/ngraph/core/src/op/transpose.cpp
+++ b/ngraph/core/src/op/transpose.cpp
@@ -6,7 +6,7 @@
 
 #include "itt.hpp"
 #include "ngraph/op/transpose.hpp"
-#include "ngraph/runtime/opt_kernel/reshape.hpp"
+#include "ngraph/runtime/reference/transpose.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -78,56 +78,45 @@ shared_ptr<Node> op::v1::Transpose::clone_with_new_inputs(const OutputVector& ne
 
 namespace transpose
 {
-    template <element::Type_t ET>
-    std::vector<int64_t> get_vector(const HostTensorPtr& arg)
-    {
-        std::vector<int64_t> rc;
-        auto p = arg->get_data_ptr<ET>();
-        for (size_t i = 0; i < shape_size(arg->get_shape()); i++)
-        {
-            rc.push_back(p[i]);
-        }
-        return rc;
-    }
-
     bool evaluate_transpose(const HostTensorPtr& arg1,
                             const HostTensorPtr& arg2,
                             const HostTensorPtr& out)
     {
         NGRAPH_CHECK(arg2->get_element_type().is_integral_number(),
-                     "axis element type is not integral data type");
-
-        std::vector<int64_t> axis_order = host_tensor_2_vector<int64_t>(arg2);
+                     "Transpose axis element type has to be integral data type.");
 
+        std::vector<int64_t> axes_order = host_tensor_2_vector<int64_t>(arg2);
         Shape in_shape = arg1->get_shape();
-        AxisVector in_axis_order(shape_size(arg2->get_shape()));
-        if (in_axis_order.empty())
+        if (shape_size(arg2->get_shape()) == 0)
         {
-            size_t rank = in_shape.size();
-            for (size_t i = 1; i <= rank; ++i)
-                in_axis_order.emplace_back(rank - i);
+            axes_order.resize(in_shape.size());
+            std::iota(axes_order.begin(), axes_order.end(), 0);
+            std::reverse(axes_order.begin(), axes_order.end());
         }
         else
         {
-            std::transform(axis_order.begin(),
-                           axis_order.end(),
-                           in_axis_order.begin(),
-                           [&](const int64_t& v) { return (v > 0) ? v : 0; });
+            std::unordered_set<int64_t> axes_set(axes_order.begin(), axes_order.end());
+            bool is_unique_order = axes_set.size() == axes_order.size();
+            NGRAPH_CHECK(is_unique_order, "Transpose axes order values must be unique.");
         }
 
         Shape out_shape(in_shape.size());
-        std::transform(in_axis_order.begin(),
-                       in_axis_order.end(),
-                       out_shape.begin(),
-                       [&](const int64_t& v) { return in_shape[v]; });
+        std::transform(
+            axes_order.begin(), axes_order.end(), out_shape.begin(), [&](const int64_t& v) {
+                NGRAPH_CHECK(v >= 0, "Negative values for transpose axes order are not supported.");
+                NGRAPH_CHECK(
+                    v < int64_t(in_shape.size()), "Transpose axis ", v, " is out of shape range.");
+                return in_shape[v];
+            });
 
         out->set_shape(out_shape);
-        runtime::opt_kernel::reshape(arg1->get_data_ptr<char>(),
-                                     out->get_data_ptr<char>(),
-                                     arg1->get_shape(),
-                                     in_axis_order,
-                                     out->get_shape(),
-                                     arg1->get_element_type().size());
+        out->set_element_type(arg1->get_element_type());
+        runtime::reference::transpose(arg1->get_data_ptr<char>(),
+                                      out->get_data_ptr<char>(),
+                                      arg1->get_shape(),
+                                      arg1->get_element_type().size(),
+                                      axes_order.data(),
+                                      out_shape);
         return true;
     }
 } // namespace transpose
diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt
index 56364055638..6d90dbc3f9c 100644
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -78,6 +78,7 @@ set(SRC
     op_eval/split.cpp
     op_eval/swish.cpp
     op_eval/strided_slice.cpp
+    op_eval/transpose.cpp
     op_eval/variadic_split.cpp
     op_is.cpp
     opset1.cpp
diff --git a/ngraph/test/backend/transpose.in.cpp b/ngraph/test/backend/transpose.in.cpp
index 1ec0ad5e895..cf673ac7ace 100644
--- a/ngraph/test/backend/transpose.in.cpp
+++ b/ngraph/test/backend/transpose.in.cpp
@@ -7,6 +7,8 @@
 #include "ngraph/runtime/tensor.hpp"
 #include "runtime/backend.hpp"
 #include "util/all_close_f.hpp"
+#include "util/engine/test_engines.hpp"
+#include "util/test_case.hpp"
 #include "util/test_control.hpp"
 #include "util/test_tools.hpp"
 
@@ -14,8 +16,9 @@ using namespace std;
 using namespace ngraph;
 
 static string s_manifest = "${MANIFEST}";
+using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 
-NGRAPH_TEST(${BACKEND_NAME}, transpose)
+NGRAPH_TEST(${BACKEND_NAME}, transpose_basic)
 {
     //
     // Create a graph for f(x,perm) = Transpose(x,Convert<i64>(perm)). We'll do the permutation in
@@ -61,3 +64,120 @@ NGRAPH_TEST(${BACKEND_NAME}, transpose)
         ASSERT_TRUE(test::all_close_f(results, expected_results[i], MIN_FLOAT_TOLERANCE_BITS));
     }
 }
+
+NGRAPH_TEST(${BACKEND_NAME}, transpose_axes_constant)
+{
+    const auto data_shape = Shape{2, 1, 3, 4};
+    const auto axes_shape = Shape{4};
+    const auto output_shape = Shape{3, 4, 2, 1};
+
+    auto data_param = make_shared<op::Parameter>(element::f32, data_shape);
+    auto axes_const = op::Constant::create(element::i64, axes_shape, {2, 3, 0, 1});
+    auto transpose = make_shared<op::Transpose>(data_param, axes_const);
+    auto function = make_shared<ngraph::Function>(NodeVector{transpose}, ParameterVector{data_param});
+
+    std::vector<float> data(shape_size(data_shape));
+    std::iota(data.begin(), data.end(), 1);
+    std::vector<float> expected_result{ 1, 13,  2, 14,  3, 15,  4, 16,  5, 17,  6, 18,  7, 19,  8, 20,  9,
+       21, 10, 22, 11, 23, 12, 24};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_input<float>(data_shape, data);
+    test_case.add_expected_output<float>(output_shape, expected_result);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, transpose_axes_empty_constant)
+{
+    const auto data_shape = Shape{2, 1, 3, 4};
+    const auto axes_shape = Shape{0};
+    const auto output_shape = Shape{4, 3, 1, 2};
+
+    auto data_param = make_shared<op::Parameter>(element::f32, data_shape);
+    auto axes_const = op::Constant::create(element::i64, axes_shape, std::vector<int>{});
+    auto transpose = make_shared<op::Transpose>(data_param, axes_const);
+    auto function = make_shared<ngraph::Function>(NodeVector{transpose}, ParameterVector{data_param});
+
+    std::vector<float> data(shape_size(data_shape));
+    std::iota(data.begin(), data.end(), 1);
+    std::vector<float> expected_result{ 1, 13,  5, 17,  9, 21,  2, 14,  6, 18, 10, 22,  3, 15,  7, 19, 11,
+       23,  4, 16,  8, 20, 12, 24};
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_input<float>(data_shape, data);
+    test_case.add_expected_output<float>(output_shape, expected_result);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, transpose_axes_parameter_static_shapes)
+{
+    const auto data_shape = Shape{2, 1, 3, 4};
+    const auto axes_shape = Shape{4};
+    const auto output_shape = Shape{3, 4, 2, 1};
+
+    auto data_param = make_shared<op::Parameter>(element::f32, data_shape);
+    auto axes_param = make_shared<op::Parameter>(element::i32, axes_shape);
+
+    auto transpose = make_shared<op::Transpose>(data_param, axes_param);
+    auto function = make_shared<ngraph::Function>(NodeVector{transpose}, ParameterVector{data_param, axes_param});
+
+    std::vector<float> data(shape_size(data_shape));
+    std::iota(data.begin(), data.end(), 1);
+
+    std::vector<int> axes{2, 3, 0, 1};
+    std::vector<float> expected_result{ 1, 13,  2, 14,  3, 15,  4, 16,  5, 17,  6, 18,  7, 19,  8, 20,  9,
+       21, 10, 22, 11, 23, 12, 24};
+
+    auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
+    test_case.add_input<float>(data_shape, data);
+    test_case.add_input<int>(axes_shape, axes);
+    test_case.add_expected_output<float>(output_shape, expected_result);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, transpose_axes_parameter_dynamic_shapes)
+{
+    const auto data_shape = Shape{2, 1, 3, 4};
+    const auto axes_shape = Shape{4};
+    const auto output_shape = Shape{3, 4, 2, 1};
+
+    auto data_param = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    auto axes_param = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+
+    auto transpose = make_shared<op::Transpose>(data_param, axes_param);
+    auto function = make_shared<ngraph::Function>(NodeVector{transpose}, ParameterVector{data_param, axes_param});
+
+    std::vector<float> data(shape_size(data_shape));
+    std::iota(data.begin(), data.end(), 1);
+
+    std::vector<int> axes{2, 3, 0, 1};
+    std::vector<float> expected_result{ 1, 13,  2, 14,  3, 15,  4, 16,  5, 17,  6, 18,  7, 19,  8, 20,  9,
+       21, 10, 22, 11, 23, 12, 24};
+
+    auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
+    test_case.add_input<float>(data_shape, data);
+    test_case.add_input<int>(axes_shape, axes);
+    test_case.add_expected_output<float>(output_shape, expected_result);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, transpose_int_data_axes_constant)
+{
+    const auto data_shape = Shape{2, 1, 3, 4};
+    const auto axes_shape = Shape{4};
+    const auto output_shape = Shape{3, 4, 2, 1};
+
+    auto data_param = make_shared<op::Parameter>(element::i32, data_shape);
+    auto axes_const = op::Constant::create(element::i64, axes_shape, {2, 3, 0, 1});
+    auto transpose = make_shared<op::Transpose>(data_param, axes_const);
+    auto function = make_shared<ngraph::Function>(NodeVector{transpose}, ParameterVector{data_param});
+
+    std::vector<int32_t> data(shape_size(data_shape));
+    std::iota(data.begin(), data.end(), 1);
+    std::vector<int32_t> expected_result{ 1, 13,  2, 14,  3, 15,  4, 16,  5, 17,  6, 18,  7, 19,  8, 20,  9,
+       21, 10, 22, 11, 23, 12, 24};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_input<int32_t>(data_shape, data);
+    test_case.add_expected_output<int32_t>(output_shape, expected_result);
+    test_case.run();
+}
diff --git a/ngraph/test/eval.cpp b/ngraph/test/eval.cpp
index e37a2cad95b..f5602023c5c 100644
--- a/ngraph/test/eval.cpp
+++ b/ngraph/test/eval.cpp
@@ -53,7 +53,6 @@
 #include "ngraph/op/tan.hpp"
 #include "ngraph/op/tanh.hpp"
 #include "ngraph/op/topk.hpp"
-#include "ngraph/op/transpose.hpp"
 #include "ngraph/op/unsqueeze.hpp"
 #include "ngraph/runtime/host_tensor.hpp"
 #include "ngraph/validation_util.hpp"
@@ -1296,85 +1295,6 @@ TEST(eval, evaluate_dynamic_concat)
     ASSERT_EQ(cval, out);
 }
 
-template <element::Type_t T>
-void test_eval(shared_ptr<Function> fun,
-               vector<vector<float>>& inputs,
-               vector<Shape>& x_shapes,
-               vector<Shape>& result_shapes,
-               vector<vector<float>>& results)
-{
-    using IN_T = typename element_type_traits<T>::value_type;
-    std::vector<std::vector<IN_T>> perms{{0, 1}, {1, 0}, {2, 1, 0}};
-    for (size_t i = 0; i < x_shapes.size(); i++)
-    {
-        auto result_tensor = make_shared<HostTensor>();
-        ASSERT_TRUE(fun->evaluate({result_tensor},
-                                  {make_host_tensor<element::Type_t::f32>(x_shapes[i], inputs[i]),
-                                   make_host_tensor<T>(Shape{perms[i].size()}, perms[i])}));
-
-        ASSERT_EQ(result_tensor->get_shape(), result_shapes[i]);
-        auto actual_results = read_vector<float>(result_tensor);
-        ASSERT_EQ(actual_results, results[i]);
-    }
-}
-
-TEST(eval, eval_transpose)
-{
-    auto x = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
-    vector<shared_ptr<op::Parameter>> axes;
-    axes.push_back(make_shared<op::Parameter>(element::i8, PartialShape{Dimension::dynamic()}));
-    axes.push_back(make_shared<op::Parameter>(element::i16, PartialShape{Dimension::dynamic()}));
-    axes.push_back(make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()}));
-    axes.push_back(make_shared<op::Parameter>(element::i64, PartialShape{Dimension::dynamic()}));
-
-    axes.push_back(make_shared<op::Parameter>(element::u8, PartialShape{Dimension::dynamic()}));
-    axes.push_back(make_shared<op::Parameter>(element::u16, PartialShape{Dimension::dynamic()}));
-    axes.push_back(make_shared<op::Parameter>(element::u32, PartialShape{Dimension::dynamic()}));
-    axes.push_back(make_shared<op::Parameter>(element::u64, PartialShape{Dimension::dynamic()}));
-
-    std::vector<Shape> x_shapes{Shape{2, 3}, Shape{2, 3}, Shape{2, 2, 3}};
-
-    std::vector<std::vector<float>> inputs{
-        {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}};
-    std::vector<Shape> result_shapes{Shape{2, 3}, Shape{3, 2}, {3, 2, 2}};
-    std::vector<std::vector<float>> results{
-        {1, 2, 3, 4, 5, 6}, {1, 4, 2, 5, 3, 6}, {1, 7, 4, 10, 2, 8, 5, 11, 3, 9, 6, 12}};
-
-    for (auto& axis : axes)
-    {
-        auto x_transpose = make_shared<op::v1::Transpose>(x, axis);
-        auto fun = make_shared<Function>(NodeVector{x_transpose}, ParameterVector{x, axis});
-
-        switch (axis->get_element_type())
-        {
-        case element::Type_t::i8:
-            test_eval<element::Type_t::i8>(fun, inputs, x_shapes, result_shapes, results);
-            break;
-        case element::Type_t::i16:
-            test_eval<element::Type_t::i16>(fun, inputs, x_shapes, result_shapes, results);
-            break;
-        case element::Type_t::i32:
-            test_eval<element::Type_t::i32>(fun, inputs, x_shapes, result_shapes, results);
-            break;
-        case element::Type_t::i64:
-            test_eval<element::Type_t::i64>(fun, inputs, x_shapes, result_shapes, results);
-            break;
-        case element::Type_t::u8:
-            test_eval<element::Type_t::u8>(fun, inputs, x_shapes, result_shapes, results);
-            break;
-        case element::Type_t::u16:
-            test_eval<element::Type_t::u16>(fun, inputs, x_shapes, result_shapes, results);
-            break;
-        case element::Type_t::u32:
-            test_eval<element::Type_t::u32>(fun, inputs, x_shapes, result_shapes, results);
-            break;
-        case element::Type_t::u64:
-            test_eval<element::Type_t::u64>(fun, inputs, x_shapes, result_shapes, results);
-            break;
-        default: NGRAPH_CHECK(false, "Invalid type"); break;
-        }
-    }
-}
 
 TEST(eval, max_pool_v1_dynamic)
 {
diff --git a/ngraph/test/op_eval/transpose.cpp b/ngraph/test/op_eval/transpose.cpp
new file mode 100644
index 00000000000..addf68ed10a
--- /dev/null
+++ b/ngraph/test/op_eval/transpose.cpp
@@ -0,0 +1,236 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "ngraph/op/transpose.hpp"
+#include "ngraph/runtime/reference/transpose.hpp"
+
+#include "ngraph/runtime/host_tensor.hpp"
+#include "ngraph/validation_util.hpp"
+#include "runtime/backend.hpp"
+#include "util/test_tools.hpp"
+#include "util/all_close_f.hpp"
+#include "util/type_prop.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+template <element::Type_t IN_ET, element::Type_t AXIS_ET>
+void test_tranpose_eval(shared_ptr<Function> fun)
+{
+    using T = typename element_type_traits<IN_ET>::value_type;
+    using T_AXIS = typename element_type_traits<AXIS_ET>::value_type;
+
+    const std::vector<std::vector<T>> input_data{
+        {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}};
+    std::vector<Shape> data_shapes{{2, 3}, {2, 3}, {2, 3}, {2, 3, 1}, {2, 2, 3}};
+    const std::vector<std::vector<T_AXIS>> axes_order{{0, 1}, {1, 0}, {}, {1, 2, 0}, {2, 1, 0}};
+
+    std::vector<std::vector<T>> expected_results{
+        {1, 2, 3, 4, 5, 6}, {1, 4, 2, 5, 3, 6}, {1, 4, 2, 5, 3, 6}, {1, 4, 2, 5, 3, 6}, {1, 7, 4, 10, 2, 8, 5, 11, 3, 9, 6, 12}};
+    std::vector<Shape> expected_result_shapes{{2, 3}, {3, 2}, {3, 2}, {3, 1, 2}, {3, 2, 2}};
+        
+    for (size_t i = 0; i < data_shapes.size(); i++)
+    {
+        auto result_tensor = make_shared<HostTensor>(element::dynamic, PartialShape::dynamic());
+        ASSERT_TRUE(fun->evaluate({result_tensor},
+                                  {make_host_tensor<IN_ET>(data_shapes[i], input_data[i]),
+                                   make_host_tensor<AXIS_ET>(Shape{axes_order[i].size()}, axes_order[i])}));
+
+        auto actual_results = read_vector<T>(result_tensor);
+        ASSERT_EQ(actual_results, expected_results[i]);
+
+        { // Temporary test for legacy reference function template
+            NGRAPH_SUPPRESS_DEPRECATED_START
+            std::vector<T> ref_results(input_data[i].size());
+            runtime::reference::transpose<T, T_AXIS>(input_data[i].data(), ref_results.data(), data_shapes[i],  axes_order[i].data());
+            ASSERT_EQ(ref_results, expected_results[i]);
+            NGRAPH_SUPPRESS_DEPRECATED_END
+        }
+    }
+}
+
+TEST(op_eval, eval_transpose)
+{
+    vector<shared_ptr<op::Parameter>> axes;
+    axes.push_back(make_shared<op::Parameter>(element::i8, PartialShape{Dimension::dynamic()}));
+    axes.push_back(make_shared<op::Parameter>(element::i16, PartialShape{Dimension::dynamic()}));
+    axes.push_back(make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()}));
+    axes.push_back(make_shared<op::Parameter>(element::i64, PartialShape{Dimension::dynamic()}));
+
+    axes.push_back(make_shared<op::Parameter>(element::u8, PartialShape{Dimension::dynamic()}));
+    axes.push_back(make_shared<op::Parameter>(element::u16, PartialShape{Dimension::dynamic()}));
+    axes.push_back(make_shared<op::Parameter>(element::u32, PartialShape{Dimension::dynamic()}));
+    axes.push_back(make_shared<op::Parameter>(element::u64, PartialShape{Dimension::dynamic()}));
+
+    for (auto& axis : axes)
+    {
+        const auto input_integral = make_shared<op::Parameter>(element::i16, PartialShape::dynamic());
+        const auto transpose_integral = make_shared<op::v1::Transpose>(input_integral, axis);
+        const auto function_integral = make_shared<Function>(OutputVector{transpose_integral}, ParameterVector{input_integral, axis});
+
+        const auto input_floating = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+        const auto transpose_floating = make_shared<op::v1::Transpose>(input_floating, axis);
+        const auto function_floating = make_shared<Function>(OutputVector{transpose_floating}, ParameterVector{input_floating, axis});
+    
+        switch (axis->get_element_type())
+        {
+        case element::Type_t::i8:
+            test_tranpose_eval<element::Type_t::i16, element::Type_t::i8>(function_integral);
+            test_tranpose_eval<element::Type_t::f32, element::Type_t::i8>(function_floating);
+            break;
+        case element::Type_t::i16:
+            test_tranpose_eval<element::Type_t::i16, element::Type_t::i16>(function_integral);
+            test_tranpose_eval<element::Type_t::f32, element::Type_t::i16>(function_floating);
+            break;
+        case element::Type_t::i32:
+            test_tranpose_eval<element::Type_t::i16, element::Type_t::i32>(function_integral);
+            test_tranpose_eval<element::Type_t::f32, element::Type_t::i32>(function_floating);
+            break;
+        case element::Type_t::i64:
+            test_tranpose_eval<element::Type_t::i16, element::Type_t::i64>(function_integral);
+            test_tranpose_eval<element::Type_t::f32, element::Type_t::i64>(function_floating);
+            break;
+        case element::Type_t::u8:
+            test_tranpose_eval<element::Type_t::i16, element::Type_t::u8>(function_integral);
+            test_tranpose_eval<element::Type_t::f32, element::Type_t::u8>(function_floating);
+            break;
+        case element::Type_t::u16:
+            test_tranpose_eval<element::Type_t::i16, element::Type_t::u16>(function_integral);
+            test_tranpose_eval<element::Type_t::f32, element::Type_t::u16>(function_floating);
+            break;
+        case element::Type_t::u32:
+            test_tranpose_eval<element::Type_t::i16, element::Type_t::u32>(function_integral);
+            test_tranpose_eval<element::Type_t::f32, element::Type_t::u32>(function_floating);
+            break;
+        case element::Type_t::u64:
+            test_tranpose_eval<element::Type_t::i16, element::Type_t::u64>(function_integral);
+            test_tranpose_eval<element::Type_t::f32, element::Type_t::u64>(function_floating);
+            break;
+        default: NGRAPH_CHECK(false, "Invalid type"); break;
+        }
+    }
+}
+
+TEST(op_eval, eval_axes_transpose)
+{
+    auto data_param = make_shared<op::Parameter>(element::i32, PartialShape::dynamic());
+    auto axes_order = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+
+    auto x_transpose = make_shared<op::v1::Transpose>(data_param, axes_order);
+    auto function = make_shared<Function>(NodeVector{x_transpose}, ParameterVector{data_param, axes_order});
+
+    const std::vector<int32_t> data{1, 2, 3, 4, 5, 6};
+    std::vector<size_t> data_shape{2, 3, 1};
+    const std::vector<int32_t> perm{1, 2, 0};
+    std::vector<int32_t> expected_result{1, 4, 2, 5, 3, 6};
+
+    auto result_tensor = make_shared<HostTensor>();
+    function->evaluate({result_tensor},
+                            {make_host_tensor<element::Type_t::i32>(data_shape, data),
+                            make_host_tensor<element::Type_t::i32>(Shape{perm.size()}, perm)});
+
+    auto actual_results = read_vector<int32_t>(result_tensor);
+    ASSERT_EQ(actual_results, expected_result);
+}
+
+TEST(op_eval, eval_duplicated_axes_transpose)
+{
+    auto data_param = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    auto axes_order = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+
+    auto x_transpose = make_shared<op::v1::Transpose>(data_param, axes_order);
+    auto function = make_shared<Function>(NodeVector{x_transpose}, ParameterVector{data_param, axes_order});
+
+    const std::vector<float> data{1, 2, 3, 4, 5, 6};
+    std::vector<size_t> data_shape{2, 3, 1};
+    const std::vector<int32_t> perm{2, 1, 2};
+    try
+    {
+        auto result_tensor = make_shared<HostTensor>();
+        function->evaluate({result_tensor},
+                                {make_host_tensor<element::Type_t::f32>(data_shape, data),
+                                make_host_tensor<element::Type_t::i32>(Shape{perm.size()}, perm)});
+
+        FAIL() << "Duplicated axes values not detected";
+    }
+    catch (const ngraph_error& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("must be unique"));
+    }
+    catch (...)
+    {
+        FAIL() << "Failed for unexpected reason";
+    }
+}
+
+TEST(op_eval, eval_out_of_shape_axes_transpose)
+{
+    auto data_param = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    auto axes_order = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+
+    auto x_transpose = make_shared<op::v1::Transpose>(data_param, axes_order);
+    auto function = make_shared<Function>(NodeVector{x_transpose}, ParameterVector{data_param, axes_order});
+
+    const std::vector<float> data{1, 2, 3, 4, 5, 6};
+    std::vector<size_t> data_shape{2, 3, 1};
+    const std::vector<int32_t> perm{0, 1, 3};
+
+    try
+    {
+        auto result_tensor = make_shared<HostTensor>();
+        function->evaluate({result_tensor},
+                                {make_host_tensor<element::Type_t::f32>(data_shape, data),
+                                make_host_tensor<element::Type_t::i32>(Shape{perm.size()}, perm)});
+
+        FAIL() << "Out of shape axes not detected";
+    }
+    catch (const ngraph_error& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("out of shape"));
+    }
+    catch (...)
+    {
+        FAIL() << "Failed for unexpected reason";
+    }
+}
+
+TEST(op_eval, eval_negative_axes_transpose)
+{
+    auto data_param = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    auto axes_order = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+
+    auto x_transpose = make_shared<op::v1::Transpose>(data_param, axes_order);
+    auto function = make_shared<Function>(NodeVector{x_transpose}, ParameterVector{data_param, axes_order});
+
+    const std::vector<float> data{1, 2, 3, 4, 5, 6};
+    std::vector<size_t> data_shape{2, 3, 1};
+    const std::vector<int32_t> perm{-1, -2, -3};
+    std::vector<float> expected_result{1, 4, 2, 5, 3, 6};
+
+    try
+    {
+        auto result_tensor = make_shared<HostTensor>();
+        function->evaluate({result_tensor},
+                                {make_host_tensor<element::Type_t::f32>(data_shape, data),
+                                make_host_tensor<element::Type_t::i32>(Shape{perm.size()}, perm)});
+
+        auto actual_results = read_vector<float>(result_tensor);
+
+        ASSERT_EQ(actual_results, expected_result);
+        FAIL() << "Negative axes for Transpose were not supported before.";
+    }
+    catch (const ngraph_error& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("not supported"));
+    }
+    catch (...)
+    {
+        FAIL() << "Failed for unexpected reason";
+    }
+}
diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest
index ae984901d13..1797f6bfb36 100644
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -866,8 +866,6 @@ broadcast_algo_matrix_stride_3
 
 # Cannot find blob with name: Parameter_1
 dyn_group_convolution_backprop_data
-dynamic_transpose
-transpose
 
 # todo: check negative indices implementation
 gather_2d_negative_and_positive_indices_axis_0_2d_input
@@ -1151,6 +1149,8 @@ IE_CPU.onnx_model_reduce_sum_13_axes_as_input
 IE_CPU.onnx_model_reduce_sum_13_input_dynamic
 IE_CPU.onnx_model_reduce_sum_13_axes_empty_dynamic_rank_input
 IE_CPU.onnx_model_reduce_sum_dynamic_rank_input
+IE_CPU.transpose_axes_parameter_static_shapes
+IE_CPU.transpose_axes_parameter_dynamic_shapes
 
 # Axes has zero dimension which is not allowed
 IE_CPU.onnx_model_reduce_sum_13_axes_as_0_dim_input
diff --git a/ngraph/test/type_prop/transpose.cpp b/ngraph/test/type_prop/transpose.cpp
index 920ec4eb954..5b156bc480a 100644
--- a/ngraph/test/type_prop/transpose.cpp
+++ b/ngraph/test/type_prop/transpose.cpp
@@ -122,6 +122,17 @@ TEST(type_prop, transpose_arg_rank_static_dynamic_input_order_rank_dynamic_ok)
     EXPECT_TRUE(r->get_output_partial_shape(0).same_scheme(PartialShape::dynamic(4)));
 }
 
+TEST(type_prop, transpose_dynamic_interval_input_data)
+{
+    auto arg = make_shared<op::Parameter>(element::f32, PartialShape{Dimension(4, 6), Dimension(2, 3), 8});
+    auto input_order = make_shared<op::Parameter>(element::i64, Shape{3});
+
+    auto r = make_shared<op::Transpose>(arg, input_order);
+
+    EXPECT_EQ(r->get_output_element_type(0), element::f32);
+    EXPECT_TRUE(r->get_output_partial_shape(0).same_scheme(PartialShape::dynamic(3)));
+}
+
 TEST(type_prop, transpose_arg_static_input_order_static_input_order_not_vector)
 {
     auto arg = make_shared<op::Parameter>(element::f32, PartialShape{2, 4, 6, 8});

From c97bb90a91a09d92755f5c594d91dfd9d7534fe3 Mon Sep 17 00:00:00 2001
From: Vladislav Volkov <vladislav.volkov@intel.com>
Date: Thu, 29 Apr 2021 07:33:21 +0300
Subject: [PATCH 03/73] Nested ITT counters lead to invalid performance
 measurement results (#5172)

* Compile time enabling or disabling of first inference time counters

* First inference time counters

* Counters for validate_nodes_and_infer_types and check_all_parameters_registered removed from first inference time counters scope

* Code style fix

* Missing macro for CC and invalid domain names

* Code style fix

* Unused function warnings fixed
---
 cmake/features.cmake                          |   8 +
 .../src/inference_engine/CMakeLists.txt       |   2 +-
 .../inference_engine/compilation_context.cpp  |   4 +-
 .../src/inference_engine/ie_core.cpp          |  18 +-
 .../src/inference_engine/ie_itt.hpp           |   1 +
 .../inference_engine/ie_network_reader.cpp    |  12 +-
 .../src/legacy_api/CMakeLists.txt             |   2 +
 .../src/legacy_api/src/ie_legacy_itt.hpp      |   1 +
 .../src/legacy_api/src/ie_util_internal.cpp   |   2 +-
 .../src/lpt_itt.h                             |  27 +++
 .../src/transformer.cpp                       |  16 ++
 .../src/mkldnn_plugin/mkldnn_exec_network.cpp |   4 +-
 .../src/mkldnn_plugin/mkldnn_graph.cpp        |  37 ++--
 .../mkldnn_plugin/mkldnn_graph_optimizer.cpp  |  29 ++-
 .../src/mkldnn_plugin/mkldnn_plugin.cpp       |   8 +-
 .../src/readers/ir_reader/ie_ir_parser.cpp    |   8 +-
 .../src/readers/ir_reader/ie_ir_reader.cpp    |  13 +-
 .../src/runtime/reference/convert.cpp         |   1 +
 ngraph/core/src/function.cpp                  |   7 +-
 ngraph/core/src/pass/graph_rewrite.cpp        |   7 +-
 ngraph/core/src/pass/manager.cpp              |  11 +-
 .../include/openvino/cc/selective_build.h     |  21 +--
 openvino/itt/CMakeLists.txt                   |  10 +
 openvino/itt/include/openvino/itt.hpp         | 171 +++++++++++++-----
 openvino/itt/src/itt.cpp                      |   2 +-
 openvino/pp/include/openvino/pp.hpp           |  17 ++
 26 files changed, 308 insertions(+), 131 deletions(-)
 create mode 100644 inference-engine/src/low_precision_transformations/src/lpt_itt.h

diff --git a/cmake/features.cmake b/cmake/features.cmake
index 275961f1ad8..ef8c2fbfc45 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -10,6 +10,14 @@ ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "
 
 ie_option (ENABLE_PROFILING_ITT "Build with ITT tracing. Optionally configure pre-built ittnotify library though INTEL_VTUNE_DIR variable." OFF)
 
+ie_option_enum(ENABLE_PROFILING_FILTER "Enable or disable ITT counter groups.\
+Supported values:\
+ ALL - enable all ITT counters (default value)\
+ FIRST_INFERENCE - enable only first inference time counters" ALL
+               ALLOWED_VALUES ALL FIRST_INFERENCE)
+
+ie_option (ENABLE_PROFILING_FIRST_INFERENCE "Build with ITT tracing of first inference time." ON)
+
 ie_option (ENABLE_DOCS "Build docs using Doxygen" OFF)
 
 ie_option(ENABLE_TEMPLATE_PLUGIN "Register template plugin into plugins.xml" OFF)
diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index 7c9200b3ded..03f41078046 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -125,7 +125,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DI
                                                       $<TARGET_PROPERTY:${TARGET_NAME}_transformations,INTERFACE_INCLUDE_DIRECTORIES>
                                                       $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
 
-target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TARGET_NAME}_reader_api)
+target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TARGET_NAME}_reader_api openvino::itt)
 
 set_ie_threading_interface_for(${TARGET_NAME}_obj)
 if (TBBBIND_2_4_FOUND)
diff --git a/inference-engine/src/inference_engine/compilation_context.cpp b/inference-engine/src/inference_engine/compilation_context.cpp
index 1463dfc48be..bcbf8627ba0 100644
--- a/inference-engine/src/inference_engine/compilation_context.cpp
+++ b/inference-engine/src/inference_engine/compilation_context.cpp
@@ -87,7 +87,7 @@ std::string NetworkCompilationContext::calculateFileInfo(const std::string& file
 
 std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
                                const std::map<std::string, std::string>& compileOptions) {
-    OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN");
     OstreamHashWrapper xmlHash;
     OstreamHashWrapper binHash;
     std::ostream xml(&xmlHash);
@@ -163,7 +163,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
 
 std::string NetworkCompilationContext::computeHash(const std::string& modelName,
                                const std::map<std::string, std::string>& compileOptions) {
-    OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName");
     size_t seed {};
     try {
         seed = hash_combine(seed, FileUtils::absoluteFilePath(modelName));
diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp
index 94165f0887b..94047c8562f 100644
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@@ -228,7 +228,7 @@ class Core::Impl : public ICore {
                                       const std::string& blobID,
                                       const std::string& modelPath = std::string(),
                                       bool forceDisableCache = false) {
-        OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::Impl::LoadNetworkImpl");
+        OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::LoadNetworkImpl");
         ExecutableNetwork execNetwork;
         execNetwork = context ? plugin.LoadNetwork(network, context, parsedConfig) :
                                 plugin.LoadNetwork(network, parsedConfig);
@@ -236,7 +236,7 @@ class Core::Impl : public ICore {
         if (!forceDisableCache && cacheManager && DeviceSupportsImportExport(plugin)) {
             try {
                 // need to export network for further import from "cache"
-                OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::Export");
+                OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Export");
                 cacheManager->writeCacheEntry(blobID, [&](std::ostream& networkStream) {
                     networkStream << CompiledBlobHeader(GetInferenceEngineVersion()->buildNumber,
                                                         NetworkCompilationContext::calculateFileInfo(modelPath));
@@ -263,7 +263,7 @@ class Core::Impl : public ICore {
         IE_ASSERT(cacheManager != nullptr);
         try {
             cacheManager->readCacheEntry(blobId, [&](std::istream &networkStream) {
-                OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport");
+                OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport");
                 try {
                     CompiledBlobHeader header;
                     networkStream >> header;
@@ -434,19 +434,19 @@ public:
     }
 
     CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override {
-        OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::ReadNetwork from file");
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from file");
         return details::ReadNetwork(modelPath, binPath, extensions);
     }
 
     CNNNetwork ReadNetwork(const std::string& model, const Blob::CPtr& weights) const override {
-        OV_ITT_SCOPED_TASK(itt::domains::IE, "Core::Impl::ReadNetwork from memory");
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "Core::Impl::ReadNetwork from memory");
         return details::ReadNetwork(model, weights, extensions);
     }
 
     // TODO: In future this method can be added to ICore interface
     ExecutableNetwork LoadNetwork(const CNNNetwork& network, const RemoteContext::Ptr& context,
                                   const std::map<std::string, std::string>& config) {
-        OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext");
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext");
         if (context == nullptr) {
             IE_THROW() << "Remote context is null";
         }
@@ -470,7 +470,7 @@ public:
 
     ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
                                   const std::map<std::string, std::string>& config) override {
-        OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::CNN");
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::CNN");
         bool forceDisableCache = config.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0;
         auto parsed = parseDeviceNameIntoConfig(deviceName, config);
         if (forceDisableCache) {
@@ -497,7 +497,7 @@ public:
     // TODO: In future this method can be added to ICore interface
     ExecutableNetwork LoadNetwork(const std::string& modelPath, const std::string& deviceName,
                                   const std::map<std::string, std::string>& config) {
-        OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::LoadNetwork::Path");
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Path");
         auto parsed = parseDeviceNameIntoConfig(deviceName, config);
         auto plugin = GetCPPPluginByName(parsed._deviceName);
         ExecutableNetwork res;
@@ -634,7 +634,7 @@ public:
      * @return Reference to a CPP plugin wrapper
      */
     InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
-        OV_ITT_SCOPED_TASK(itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName");
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName");
 
         std::lock_guard<std::mutex> lock(pluginsMutex);
 
diff --git a/inference-engine/src/inference_engine/ie_itt.hpp b/inference-engine/src/inference_engine/ie_itt.hpp
index 8bd72cfc215..343fdc110c1 100644
--- a/inference-engine/src/inference_engine/ie_itt.hpp
+++ b/inference-engine/src/inference_engine/ie_itt.hpp
@@ -16,6 +16,7 @@ namespace itt {
 namespace domains {
     OV_ITT_DOMAIN(IE);
     OV_ITT_DOMAIN(IE_LT);
+    OV_ITT_DOMAIN(IE_RT);
 }
 }
 }
diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/ie_network_reader.cpp
index 9dd7b7a0b41..b7bccd841e3 100644
--- a/inference-engine/src/inference_engine/ie_network_reader.cpp
+++ b/inference-engine/src/inference_engine/ie_network_reader.cpp
@@ -151,7 +151,6 @@ void assertIfIRv7LikeModel(std::istream & modelStream) {
 }  // namespace
 
 CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector<IExtensionPtr>& exts) {
-    OV_ITT_SCOPED_TASK(itt::domains::IE, "details::ReadNetwork");
     // Register readers if it is needed
     registerReaders();
 
@@ -210,11 +209,13 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
                 binStream.seekg(0, std::ios::beg);
 
                 Blob::Ptr weights = make_shared_blob<uint8_t>({Precision::U8, { fileSize }, C });
-                weights->allocate();
 
-                binStream.read(weights->buffer(), fileSize);
-
-                binStream.close();
+                {
+                    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_RT, "ReadNetworkWeights");
+                    weights->allocate();
+                    binStream.read(weights->buffer(), fileSize);
+                    binStream.close();
+                }
 
                 // read model with weights
                 auto network = reader->read(modelStream, weights, exts);
@@ -230,7 +231,6 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
 }
 
 CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) {
-    OV_ITT_SCOPED_TASK(itt::domains::IE, "details::ReadNetwork");
     // Register readers if it is needed
     registerReaders();
     std::istringstream modelStream(model);
diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt
index 5993b0ad312..f7de7ce20ec 100644
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@@ -50,6 +50,8 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
 
 target_compile_definitions(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_COMPILE_DEFINITIONS>)
 
+target_link_libraries(${TARGET_NAME}_obj PRIVATE openvino::itt)
+
 add_cpplint_target(${TARGET_NAME}_obj_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
 
 # Create shared library
diff --git a/inference-engine/src/legacy_api/src/ie_legacy_itt.hpp b/inference-engine/src/legacy_api/src/ie_legacy_itt.hpp
index 203394e6eaf..91d7f09b3e6 100644
--- a/inference-engine/src/legacy_api/src/ie_legacy_itt.hpp
+++ b/inference-engine/src/legacy_api/src/ie_legacy_itt.hpp
@@ -15,6 +15,7 @@ namespace InferenceEngine {
 namespace itt {
 namespace domains {
     OV_ITT_DOMAIN(IELegacy);
+    OV_ITT_DOMAIN(IELegacy_LT);
 }
 }
 }
diff --git a/inference-engine/src/legacy_api/src/ie_util_internal.cpp b/inference-engine/src/legacy_api/src/ie_util_internal.cpp
index cbe1e3ec8ee..a5e39527b7b 100644
--- a/inference-engine/src/legacy_api/src/ie_util_internal.cpp
+++ b/inference-engine/src/legacy_api/src/ie_util_internal.cpp
@@ -148,7 +148,7 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
 }
 
 CNNNetwork cloneNetwork(const CNNNetwork& network) {
-    OV_ITT_SCOPED_TASK(itt::domains::IELegacy, "cloneNetwork");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IELegacy_LT, "cloneNetwork");
 
     if (network.getFunction()) {
         return CNNNetwork(std::make_shared<details::CNNNetworkNGraphImpl>(network));
diff --git a/inference-engine/src/low_precision_transformations/src/lpt_itt.h b/inference-engine/src/low_precision_transformations/src/lpt_itt.h
new file mode 100644
index 00000000000..5b3f1b524bc
--- /dev/null
+++ b/inference-engine/src/low_precision_transformations/src/lpt_itt.h
@@ -0,0 +1,27 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief Defines openvino domains for tracing
+ * @file lpt_itt.h
+ */
+
+#pragma once
+
+#include <openvino/itt.hpp>
+
+namespace ngraph {
+namespace pass {
+namespace low_precision {
+namespace itt {
+namespace domains {
+
+OV_ITT_DOMAIN(LPT);
+OV_ITT_DOMAIN(LPT_LT);
+
+} // namespace domains
+} // namespace itt
+} // namespace low_precision
+} // namespace pass
+} // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp
index 31ad8c8b498..eee96c562e7 100644
--- a/inference-engine/src/low_precision_transformations/src/transformer.cpp
+++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp
@@ -21,6 +21,8 @@
 #include "ngraph/pass/constant_folding.hpp"
 #include "ngraph/opsets/opset6.hpp"
 
+#include "lpt_itt.h"
+
 // branch specific transformations
 #include "low_precision/concat.hpp"
 #include "low_precision/concat_multi_channels.hpp"
@@ -360,6 +362,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
         return;
     }
 
+    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::LPT_LT, "LowPrecisionTransformer", "transform");
+
     ngraph::pass::ConstantFolding constantFolding;
     constantFolding.run_on_function(network);
 
@@ -368,12 +372,16 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
 
     TransformationContext context(network);
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "TypeRelaxedReplacer");
+
     // Extend necessary operations with polymorphic semantics
     {
         TypeRelaxedReplacer pass;
         pass.run_on_function(network);
     }
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "BranchSpecificTransformations");
+
     {
         // Branch specific transformations
         GraphRewrite pass;
@@ -381,6 +389,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
         pass.run_on_function(network);
     }
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FakeQuantizeDecomposition");
+
     {
         // Step #1: FakeQuantize decomposition transformation execution
         GraphRewrite pass;
@@ -388,6 +398,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
         pass.run_on_function(network);
     }
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "LayerTransformations");
+
     {
         // Step #2: layer transformations execution
         GraphRewrite pass;
@@ -395,6 +407,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
         pass.run_on_function(network);
     }
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "CleanupTransformations");
+
     {
         // Step #3: cleanup transformations execution
         GraphRewrite pass;
@@ -402,6 +416,8 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
         pass.run_on_function(network);
     }
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "StandaloneCleanupTransformations");
+
     {
         // Step #4: standalone cleanup transformations execution
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
index 156317c1d73..f3acd6d5a71 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@@ -44,7 +44,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
     _cfg{cfg},
     _name{network.getName()},
     _numaNodesWeights(numaNodesWeights) {
-    OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet");
+    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet");
 
     // we are cloning network if we have statistics and we can transform network.
     _clonedNetwork = cloneNetwork(network);
@@ -98,7 +98,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
         }
     }
 
-    OV_ITT_TASK_NEXT(taskChain, "createConstInputs");
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "createConstInputs");
     auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, const std::vector<size_t>& shape, const std::string& name) {
         LayerParams attrs = {layer->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()};
         auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index e9db4a3076e..d1507ad1dad 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -97,7 +97,7 @@ template void MKLDNNGraph::ApplyUnrollPasses(CNNNetwork&);
 template<typename NET>
 void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr& extMgr,
         MKLDNNWeightsSharing::Ptr &w_cache) {
-    OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph");
+    OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph");
 
     if (IsReady())
         ForgetGraphData();
@@ -210,6 +210,7 @@ void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNEx
 }
 
 void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
+    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "MKLDNNGraph::Replicate", "CNNNetwork");
     InputsDataMap inputs = network.getInputsInfo();
 
     this->_name = network.getName();
@@ -234,6 +235,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
         return -1;
     };
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AllNodes");
+
     // Replicate All Nodes in topological order
     for (const auto layer : CNNNetSortTopologically(network)) {
         CNNLayerPtr _layer = layer;
@@ -271,6 +274,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
         }
     }
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Outputs");
+
     OutputsDataMap outputs = network.getOutputsInfo();
     for (const auto &output : outputs) {
         const auto data = output.second;
@@ -293,6 +298,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
         unused_data.erase(data);
     }
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AddStubs");
+
     // Add stub output node for unused data
     for (auto to_stub_data : unused_data) {
         auto parent_layer = getCreatorLayer(to_stub_data).lock();
@@ -309,6 +316,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
         graphNodes.push_back(node);
     }
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Inputs");
+
     // Replicate input nodes
     for (const auto& input : inputs) {
         auto inputLayer = getCreatorLayer(input.second->getInputData()).lock();
@@ -384,7 +393,7 @@ void MKLDNNGraph::InitGraph() {
 }
 
 void MKLDNNGraph::SetOriginalLayerNames() {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
 
     // Do it before cleanup. Because it will lose original layers information
     for (auto &graphNode : graphNodes) {
@@ -409,14 +418,14 @@ void MKLDNNGraph::SetOriginalLayerNames() {
 }
 
 void MKLDNNGraph::InitNodes() {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
     for (auto &node : graphNodes) {
         node->init();
     }
 }
 
 void MKLDNNGraph::InitDescriptors() {
-    OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "InitDescriptors", "Prepare");
+    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "InitDescriptors", "Prepare");
 
     for (auto &node : graphNodes) {
         if (node->getType() == Input && _meanImages.find(node->getName()) != _meanImages.end()) {
@@ -424,18 +433,18 @@ void MKLDNNGraph::InitDescriptors() {
             if (inputNode)
                 inputNode->withMeanImage();
         }
-        OV_ITT_TASK_NEXT(taskChain, node->profiling.getSupportedDescriptors);
+        OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.getSupportedDescriptors);
         node->getSupportedDescriptors();
 
-        OV_ITT_TASK_NEXT(taskChain, node->profiling.initSupportedPrimitiveDescriptors);
+        OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.initSupportedPrimitiveDescriptors);
         node->initSupportedPrimitiveDescriptors();
 
-        OV_ITT_TASK_NEXT(taskChain, node->profiling.filterSupportedPrimitiveDescriptors);
+        OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.filterSupportedPrimitiveDescriptors);
         node->filterSupportedPrimitiveDescriptors();
     }
 
     for (auto &node : graphNodes) {
-        OV_ITT_TASK_NEXT(taskChain, node->profiling.selectOptimalPrimitiveDescriptor);
+        OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, node->profiling.selectOptimalPrimitiveDescriptor);
         node->selectOptimalPrimitiveDescriptor();
     }
 }
@@ -443,13 +452,13 @@ void MKLDNNGraph::InitDescriptors() {
 void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
     OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::InitOptimalPrimitiveDescriptors");
     for (auto &node : graphNodes) {
-        OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.initOptimalPrimitiveDescriptor);
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, node->profiling.initOptimalPrimitiveDescriptor);
         node->initOptimalPrimitiveDescriptor();
     }
 }
 
 void MKLDNNGraph::ExecuteConstantNodesOnly() {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
     mkldnn::stream stream(eng);
 
     using shared_memory_ptr = MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr;
@@ -511,7 +520,7 @@ static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& c
 }
 
 void MKLDNNGraph::InitEdges() {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
 
     size_t numberOfEdges = graphEdges.size();
 
@@ -730,7 +739,7 @@ void MKLDNNGraph::AllocateWithReuse() {
 }
 
 void MKLDNNGraph::Allocate() {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::Allocate");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::Allocate");
 
     // resolve edges. Define which will be a view on others
     //   NeedAllocation - real blob
@@ -750,7 +759,7 @@ void MKLDNNGraph::Allocate() {
 void MKLDNNGraph::CreatePrimitives() {
     OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::CreatePrimitives");
     for (auto& node : graphNodes) {
-        OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, node->profiling.createPrimitive);
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, node->profiling.createPrimitive);
         node->createPrimitive();
     }
 }
@@ -888,7 +897,7 @@ void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sort
 }
 
 void MKLDNNGraph::SortTopologically() {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SortTopologically");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SortTopologically");
 
     std::vector<MKLDNNNodePtr> unsorted;
     std::vector<MKLDNNNodePtr> sorted;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
index 5452c2343a6..10c808ba2b2 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -50,88 +50,111 @@ using namespace InferenceEngine;
 MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
 
 void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations");
-
+    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "MergeTwoEqualScaleShifts");
     MergeTwoEqualScaleShifts(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBroadcastAndEltwise");
     FuseBroadcastAndEltwise(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseClampAndQuantize");
     FuseClampAndQuantize(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseScaleShiftAndQuantize");
     FuseScaleShiftAndQuantize(graph);
     graph.RemoveDroppedNodes();
 
     MergeGroupConvolution(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndZeroPoints");
     FuseConvolutionAndZeroPoints(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
     FuseConvolutionAndDepthwise(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndActivation");
     FuseConvolutionAndActivation(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
     FuseConvolutionAndDepthwise(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndQuantize");
     FuseConvolutionAndQuantize(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
     graph.SortTopologically();
     graph.RemoveDroppedEdges();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
     FuseConvolutionAndDepthwise(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePoolingAndQuantize");
     FusePoolingAndQuantize(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
     graph.SortTopologically();
     graph.RemoveDroppedEdges();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDWConvolution");
     FuseConvolutionAndDWConvolution(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBinaryConvolutionAndQuantize");
     FuseBinaryConvolutionAndQuantize(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBatchNormWithScale");
     FuseBatchNormWithScale(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveIdentityOperator");
     RemoveIdentityOperator(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionSumAndConvolutionSumActivation");
     FuseConvolutionSumAndConvolutionSumActivation(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperation");
     FuseConvolutionAndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseFullyConnectedAndSimpleOperation");
     FuseFullyConnectedAndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMVNAndSimpleOperation");
     FuseMVNAndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseInterpolateAndSimpleOperation");
     FuseInterpolateAndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseNormalizeAndSimpleOperation");
     FuseNormalizeAndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseEltwiseAndSimple");
     FuseEltwiseAndSimple(graph);
     graph.RemoveDroppedNodes();
 
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
     graph.RemoveDroppedEdges();
 }
 
 void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations");
 
     RemoveIOScaleShifts(graph);
     graph.RemoveDroppedNodes();
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 7f043c40361..95302f9d442 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -299,7 +299,7 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
 
     using namespace ngraph::pass::low_precision;
     if (useLpt) {
-        OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations");
+        OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations");
 
         ngraph::pass::Manager manager;
         auto lptPrerequisites = manager.register_pass<ngraph::pass::GraphRewrite>();
@@ -363,11 +363,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
 
     legacyManager.run_passes(nGraphFunc);
 
-    OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
+    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
 
     clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize));
 
-    OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision");
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConvertIOPrecision");
 
     // WA: after conversion to CNNNetwork user precision can redefine input/output precisions
     // so we need to apply additional precision conversion but only for inputs and outputs
@@ -423,7 +423,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
     IE_SUPPRESS_DEPRECATED_END
     auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(icnnnet);
     if (implNetwork) {
-        OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding");
+        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding");
         // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
         ConstTransformer transformator(implNetwork.get());
         transformator.fullTrim();
diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
index 919ecbddfe1..0c29f342600 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
@@ -545,7 +545,7 @@ void XmlDeserializer::on_adapter(
 
 std::shared_ptr<ngraph::Function> XmlDeserializer::parse_function(
     const pugi::xml_node& root, const Blob::CPtr& weights) {
-    OV_ITT_TASK_CHAIN(taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse");
+    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::V10Reader_RT, "V10Parser", "Parse");
 
     struct FunctionNodes {
         ngraph::ParameterVector parameters;
@@ -604,7 +604,7 @@ std::shared_ptr<ngraph::Function> XmlDeserializer::parse_function(
     };
     std::for_each(outputs.begin(), outputs.end(), dfs);
 
-    OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphNodes");
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConstructNgraphNodes");
 
     FunctionNodes func_nodes;
 
@@ -665,7 +665,7 @@ std::shared_ptr<ngraph::Function> XmlDeserializer::parse_function(
         func_nodes.all.emplace_back(node);
     }
 
-    OV_ITT_TASK_NEXT(taskChain, "ConstructNgraphFunction");
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConstructNgraphFunction");
 
     auto function = std::make_shared<ngraph::Function>(
         func_nodes.results, func_nodes.sinks, func_nodes.parameters, GetStrAttr(root, "name", ""));
@@ -876,7 +876,7 @@ std::shared_ptr<ICNNNetwork> V10Parser::parse(
     XmlDeserializer visitor(root, weights, opsets, variables);
     visitor.on_attribute("net", function);
 
-    OV_ITT_SCOPED_TASK(itt::domains::V10Reader_RT, "ConstructCNNNetwork");
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::V10Reader_RT, "ConstructCNNNetwork");
 
     CNNNetwork net(function, _exts);
     parsePreProcess(net, root, weights);
diff --git a/inference-engine/src/readers/ir_reader/ie_ir_reader.cpp b/inference-engine/src/readers/ir_reader/ie_ir_reader.cpp
index 9d659e1c5db..4133f7073ef 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_reader.cpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_reader.cpp
@@ -33,14 +33,19 @@ CNNNetwork IRReader::read(std::istream& model, const std::vector<IExtensionPtr>&
     return read(model, nullptr, exts);
 }
 
-CNNNetwork IRReader::read(std::istream& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) const {
-    OV_ITT_SCOPED_TASK(itt::domains::V10Reader, "IRReader::read");
-
-    pugi::xml_document xmlDoc;
+static void loadXml(pugi::xml_document &xmlDoc, std::istream& model) {
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::V10Reader_RT, "loadXml");
     pugi::xml_parse_result res = xmlDoc.load(model);
     if (res.status != pugi::status_ok) {
         IE_THROW() << res.description() << "at offset " << res.offset;
     }
+}
+
+CNNNetwork IRReader::read(std::istream& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) const {
+    OV_ITT_SCOPED_TASK(itt::domains::V10Reader, "IRReader::read");
+
+    pugi::xml_document xmlDoc;
+    loadXml(xmlDoc, model);
     pugi::xml_node root = xmlDoc.document_element();
 
     auto version = details::GetIRVersion(root);
diff --git a/ngraph/core/reference/src/runtime/reference/convert.cpp b/ngraph/core/reference/src/runtime/reference/convert.cpp
index d87567b9b43..809c9ca3dc5 100644
--- a/ngraph/core/reference/src/runtime/reference/convert.cpp
+++ b/ngraph/core/reference/src/runtime/reference/convert.cpp
@@ -30,6 +30,7 @@ namespace ngraph
                     gen.vpmovzxbd(i32vec, u8vec);
                     gen.vcvtdq2ps(fvec, i32vec);
                     gen.vcvtps2ph(f16vec, fvec, 0);
+                    gen.vzeroupper();
                     gen.movdqu(gen.xword[dst], f16vec);
                 }
 
diff --git a/ngraph/core/src/function.cpp b/ngraph/core/src/function.cpp
index b2d831470c6..10db441ec58 100644
--- a/ngraph/core/src/function.cpp
+++ b/ngraph/core/src/function.cpp
@@ -88,8 +88,8 @@ Function::Function(const OutputVector& results,
 
 void Function::check_all_parameters_registered() const
 {
-    OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT,
-                       "Function::check_all_parameters_registered");
+    OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraph, "Function::check_all_parameters_registered");
+
     std::stringstream unregistered_parameters;
     for (auto& node : get_ordered_ops())
     {
@@ -104,8 +104,7 @@ void Function::check_all_parameters_registered() const
 
 void Function::validate_nodes_and_infer_types() const
 {
-    OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraphPass_LT,
-                       "Function::validate_nodes_and_infer_types");
+    OV_ITT_SCOPED_TASK(ngraph::itt::domains::nGraph, "Function::validate_nodes_and_infer_types");
 
     struct Counter
     {
diff --git a/ngraph/core/src/pass/graph_rewrite.cpp b/ngraph/core/src/pass/graph_rewrite.cpp
index 693dd8e226c..75e6b7ca9ae 100644
--- a/ngraph/core/src/pass/graph_rewrite.cpp
+++ b/ngraph/core/src/pass/graph_rewrite.cpp
@@ -62,14 +62,14 @@ namespace ngraph
 {
     namespace pass
     {
-        namespace
+        namespace internal
         {
             PerfCounters& perf_counters_graph_rewrite()
             {
                 static PerfCounters counters;
                 return counters;
             }
-        } // namespace
+        } // namespace internal
     }     // namespace pass
 } // namespace ngraph
 
@@ -428,7 +428,8 @@ void ngraph::pass::MatcherPass::register_matcher(const std::shared_ptr<ngraph::p
 
 bool ngraph::pass::MatcherPass::apply(std::shared_ptr<ngraph::Node> node)
 {
-    OV_ITT_SCOPED_TASK(itt::domains::nGraph, pass::perf_counters_graph_rewrite()[get_type_info()]);
+    OV_ITT_SCOPED_TASK(itt::domains::nGraph,
+                       pass::internal::perf_counters_graph_rewrite()[get_type_info()]);
     m_new_nodes.clear();
     if (m_handler)
         return m_handler(node);
diff --git a/ngraph/core/src/pass/manager.cpp b/ngraph/core/src/pass/manager.cpp
index c04cc7f1331..59f728f09cf 100644
--- a/ngraph/core/src/pass/manager.cpp
+++ b/ngraph/core/src/pass/manager.cpp
@@ -29,14 +29,14 @@ namespace ngraph
 {
     namespace pass
     {
-        namespace
+        namespace internal
         {
-            PerfCounters& perf_counters_manager()
+            PerfCounters& perf_counters()
             {
                 static PerfCounters counters;
                 return counters;
             }
-        } // namespace
+        } // namespace internal
     }     // namespace pass
 } // namespace ngraph
 
@@ -72,8 +72,9 @@ void pass::Manager::run_passes(shared_ptr<Function> func)
             continue;
         }
 
-        OV_ITT_SCOPED_TASK(itt::domains::nGraphPass_LT,
-                           pass::perf_counters_manager()[pass->get_type_info()]);
+        OV_ITT_SCOPE(FIRST_INFERENCE,
+                     itt::domains::nGraphPass_LT,
+                     pass::internal::perf_counters()[pass->get_type_info()]);
 
         pass_timer.start();
 
diff --git a/openvino/conditional_compilation/include/openvino/cc/selective_build.h b/openvino/conditional_compilation/include/openvino/cc/selective_build.h
index 15a7b70805b..756179695a3 100644
--- a/openvino/conditional_compilation/include/openvino/cc/selective_build.h
+++ b/openvino/conditional_compilation/include/openvino/cc/selective_build.h
@@ -187,25 +187,10 @@ bool match(char const *region, Ctx && ctx, T && val, Case && cs, Cases&&... case
 
 #define OV_CC_DOMAINS(Module)
 
-// Placeholder for first macro argument
-#define OV_CC_SCOPE_ARG_PLACEHOLDER_1 0,
-
-// This macro returns second argument, first argument is ignored
-#define OV_CC_SCOPE_SECOND_ARG(...) OV_PP_EXPAND(OV_CC_SCOPE_SECOND_ARG_(__VA_ARGS__, 0))
-#define OV_CC_SCOPE_SECOND_ARG_(...) OV_PP_EXPAND(OV_CC_SCOPE_SECOND_ARG_GET(__VA_ARGS__))
-#define OV_CC_SCOPE_SECOND_ARG_GET(ignored, val, ...) val
-
-// Return macro argument value
-#define OV_CC_SCOPE_IS_ENABLED(x) OV_CC_SCOPE_IS_ENABLED1(x)
-
-// Generate junk macro or {0, } sequence if val is 1
-#define OV_CC_SCOPE_IS_ENABLED1(val) OV_CC_SCOPE_IS_ENABLED2(OV_PP_CAT(OV_CC_SCOPE_ARG_PLACEHOLDER_, val))
-
-// Return second argument from possible sequences {1, 0}, {0, 1, 0}
-#define OV_CC_SCOPE_IS_ENABLED2(arg1_or_junk) OV_CC_SCOPE_SECOND_ARG(arg1_or_junk 1, 0)
+#define OV_CC_SCOPE_IS_ENABLED OV_PP_IS_ENABLED
 
 #define OV_SCOPE(Module, region)    \
-    for (bool ovCCScopeIsEnabled = OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, region)); ovCCScopeIsEnabled; ovCCScopeIsEnabled = false)
+    for (bool ovCCScopeIsEnabled = OV_PP_IS_ENABLED(OV_PP_CAT3(Module, _, region)); ovCCScopeIsEnabled; ovCCScopeIsEnabled = false)
 
 // Switch is disabled
 #define OV_CC_SWITCH_0(Module, fn, ctx, val)
@@ -214,7 +199,7 @@ bool match(char const *region, Ctx && ctx, T && val, Case && cs, Cases&&... case
 #define OV_CC_SWITCH_1(Module, fn, ctx, val) openvino::cc::internal::match<fn>(ctx, val, OV_PP_CAT4(Module, _, fn, _cases));
 
 #define OV_SWITCH(Module, fn, ctx, val, ...)    \
-    OV_PP_EXPAND(OV_PP_CAT(OV_CC_SWITCH_, OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, fn)))(Module, fn, ctx, val))
+    OV_PP_EXPAND(OV_PP_CAT(OV_CC_SWITCH_, OV_PP_IS_ENABLED(OV_PP_CAT3(Module, _, fn)))(Module, fn, ctx, val))
 
 #define OV_CASE(Case, Type) openvino::cc::internal::make_case_wrapper<Type>(Case)
 
diff --git a/openvino/itt/CMakeLists.txt b/openvino/itt/CMakeLists.txt
index 0f9437c9f57..e9f880b8ce7 100644
--- a/openvino/itt/CMakeLists.txt
+++ b/openvino/itt/CMakeLists.txt
@@ -14,6 +14,16 @@ target_link_libraries(${TARGET_NAME} PUBLIC openvino::pp)
 
 if(TARGET ittnotify)
     target_link_libraries(${TARGET_NAME} PUBLIC ittnotify)
+    if(ENABLE_PROFILING_FILTER STREQUAL "ALL")
+        target_compile_definitions(${TARGET_NAME} PUBLIC
+            ENABLE_PROFILING_ALL
+            ENABLE_PROFILING_FIRST_INFERENCE)
+    elseif(ENABLE_PROFILING_FILTER STREQUAL "FIRST_INFERENCE")
+        target_compile_definitions(${TARGET_NAME} PUBLIC
+            ENABLE_PROFILING_FIRST_INFERENCE)
+    else()
+        message(FATAL_ERROR "The ${ENABLE_PROFILING_FILTER} profiling filter isn't supported")
+    endif()
 endif()
 
 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
diff --git a/openvino/itt/include/openvino/itt.hpp b/openvino/itt/include/openvino/itt.hpp
index d950664ea80..8a22efa54fe 100644
--- a/openvino/itt/include/openvino/itt.hpp
+++ b/openvino/itt/include/openvino/itt.hpp
@@ -214,6 +214,8 @@ namespace openvino
  */
 #define OV_ITT_DOMAIN(...) OV_PP_OVERLOAD(OV_ITT_DOMAIN, __VA_ARGS__)
 
+#define OV_ITT_GROUP(group) OV_PP_CAT(ENABLE_PROFILING_, group)
+
 /**
  * @cond
  */
@@ -232,6 +234,37 @@ inline openvino::itt::domain_t domainName() noexcept
     return d;                                                                                       \
 }
 
+/**
+ * @endcond
+ */
+
+/**
+ * @def OV_ITT_SCOPE(domain, handleOrTaskName)
+ * @ingroup ie_dev_profiling
+ * @brief Annotate section of code till scope exit to be profiled using known @p handle or @p taskName as section id.
+ * @details In case if handle or taskName absent, the current function name is used.
+ * @param group [in] ITT counter group name used for enabling/disabling at compile time.
+ * @param domainName [in] Known at compile time name of module or library (the domain name).
+ * @param handleOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
+ */
+#define OV_ITT_SCOPE(group, ...)                                                                    \
+    OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__))
+
+/**
+ * @cond
+ */
+
+#define OV_ITT_SCOPE_IMPL_0(...)
+#define OV_ITT_SCOPE_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE, __VA_ARGS__)
+
+#define OV_ITT_SCOPE_1(domain)                                                               \
+        openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__)                        \
+                    (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(ITT_FUNCTION_NAME));
+
+#define OV_ITT_SCOPE_2(domain, taskOrTaskName)                                               \
+        openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__)                        \
+                    (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(taskOrTaskName));
+
 /**
  * @endcond
  */
@@ -244,19 +277,97 @@ inline openvino::itt::domain_t domainName() noexcept
  * @param domainName [in] Known at compile time name of module or library (the domain name).
  * @param handleOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
  */
-#define OV_ITT_SCOPED_TASK(...) OV_PP_OVERLOAD(OV_ITT_SCOPED_TASK, __VA_ARGS__)
+#define OV_ITT_SCOPED_TASK(...) OV_ITT_SCOPE(ALL, __VA_ARGS__)
+
+/**
+ * @def OV_ITT_TASK_CHAIN(chainId, domain, prefix, taskName)
+ * @ingroup ie_dev_profiling
+ * @brief Begins the sequrence of an annotated sections of code using @p prefix and @p taskName as section id.
+ * @details In case if prefix absent, the current function name is used,
+ *          if taskName absent, the first chain index is used, i.e 1.
+ * @param group [in] ITT counter group name used for enabling/disabling at compile time.
+ * @param chainId [in] The tasks chain identifier.
+ * @param domainName [in] Known at compile time name of module or library (the domain name).
+ * @param prefix [in] The task chain name prefix. The task name starts with this prefix. Parameter is optional.
+ * @param taskName [in] The annotation name for section of code. Parameter is optional.
+ */
+#define OV_ITT_SCOPE_CHAIN(group, ...)                                                              \
+    OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_CHAIN_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__))
 
 /**
  * @cond
  */
 
-#define OV_ITT_SCOPED_TASK_1(domain)                                                                \
-        openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__)                        \
-                    (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(ITT_FUNCTION_NAME));
+#define OV_ITT_SCOPE_CHAIN_IMPL_0(...)
+#define OV_ITT_SCOPE_CHAIN_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE_CHAIN, __VA_ARGS__)
 
-#define OV_ITT_SCOPED_TASK_2(domain, taskOrTaskName)                                                \
-        openvino::itt::ScopedTask<domain> OV_PP_CAT(ittScopedTask, __LINE__)                        \
-                    (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(taskOrTaskName));
+#define OV_ITT_SCOPE_CHAIN_2(chainId, domain)                                                       \
+        openvino::itt::TaskChain<domain> chainId                                                    \
+            (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>                                \
+                (std::string(ITT_FUNCTION_NAME) + "_1"),                                            \
+            ITT_FUNCTION_NAME);
+
+#define OV_ITT_SCOPE_CHAIN_3(chainId, domain, prefix)                                               \
+        openvino::itt::TaskChain<domain> chainId                                                    \
+            (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>                                \
+                (std::string(prefix) + "_1"),                                                       \
+            prefix);
+
+#define OV_ITT_SCOPE_CHAIN_4(chainId, domain, prefix, taskName)                                     \
+        openvino::itt::TaskChain<domain> chainId                                                    \
+            (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>                                \
+                (std::string(prefix) + "_" + taskName),                                             \
+            prefix);
+
+/**
+ * @endcond
+ */
+
+/**
+ * @def OV_ITT_SCOPE_NEXT(group, chainId, taskName)
+ * @ingroup ie_dev_profiling
+ * @brief Inserts new annotated section of code to tasks chain using @p taskName as section id.
+ * @details If taskName is missing, the current chain index is used.
+ * @param group [in] ITT counter group name used for enabling/disabling at compile time.
+ * @param chainId [in] The tasks chain identifier.
+ * @param taskOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
+ */
+#define OV_ITT_SCOPE_NEXT(group, ...)                                                                \
+    OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_NEXT_IMPL_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(__VA_ARGS__))
+
+/**
+ * @cond
+ */
+
+#define OV_ITT_SCOPE_NEXT_IMPL_0(...)
+#define OV_ITT_SCOPE_NEXT_IMPL_1(...) OV_PP_OVERLOAD(OV_ITT_SCOPE_NEXT, __VA_ARGS__)
+
+#define OV_ITT_SCOPE_NEXT_1(chainId)                                                                 \
+        chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskName()));
+
+#define OV_ITT_SCOPE_NEXT_2(chainId, taskOrTaskName)                                                 \
+        chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskNameOrHandle(taskOrTaskName)));
+
+/**
+ * @endcond
+ */
+
+/**
+ * @def OV_ITT_SCOPE_SKIP(group, chainId)
+ * @ingroup ie_dev_profiling
+ * @brief Skips the remaining task scope.
+ * @param group [in] ITT counter group name used for enabling/disabling at compile time.
+ * @param chainId [in] The tasks chain identifier.
+ */
+#define OV_ITT_SCOPE_SKIP(group, chainId)                                                           \
+    OV_PP_EXPAND(OV_PP_CAT(OV_ITT_SCOPE_SKIP_, OV_PP_IS_ENABLED(OV_ITT_GROUP(group)))(chainId))
+
+/**
+ * @cond
+ */
+
+#define OV_ITT_SCOPE_SKIP_0(chainId)
+#define OV_ITT_SCOPE_SKIP_1(chainId) chainId.skip();
 
 /**
  * @endcond
@@ -273,33 +384,7 @@ inline openvino::itt::domain_t domainName() noexcept
  * @param prefix [in] The task chain name prefix. The task name starts with this prefix. Parameter is optional.
  * @param taskName [in] The annotation name for section of code. Parameter is optional.
  */
-#define OV_ITT_TASK_CHAIN(...) OV_PP_OVERLOAD(OV_ITT_TASK_CHAIN, __VA_ARGS__)
-
-/**
- * @cond
- */
-
-#define OV_ITT_TASK_CHAIN_2(chainId, domain)                                                        \
-        openvino::itt::TaskChain<domain> chainId                                                    \
-            (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>                                \
-                (std::string(ITT_FUNCTION_NAME) + "_1"),                                            \
-            ITT_FUNCTION_NAME);
-
-#define OV_ITT_TASK_CHAIN_3(chainId, domain, prefix)                                                \
-        openvino::itt::TaskChain<domain> chainId                                                    \
-            (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>                                \
-                (std::string(prefix) + "_1"),                                                       \
-            prefix);
-
-#define OV_ITT_TASK_CHAIN_4(chainId, domain, prefix, taskName)                                      \
-        openvino::itt::TaskChain<domain> chainId                                                    \
-            (openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>                                \
-                (std::string(prefix) + "_" + taskName),                                             \
-            prefix);
-
-/**
- * @endcond
- */
+#define OV_ITT_TASK_CHAIN(...) OV_ITT_SCOPE_CHAIN(ALL, __VA_ARGS__)
 
 /**
  * @def OV_ITT_TASK_NEXT(chainId, taskName)
@@ -309,21 +394,7 @@ inline openvino::itt::domain_t domainName() noexcept
  * @param chainId [in] The tasks chain identifier.
  * @param taskOrTaskName [in] The annotation name or handle for section of code. Parameter is optional.
  */
-#define OV_ITT_TASK_NEXT(...) OV_PP_OVERLOAD(OV_ITT_TASK_NEXT, __VA_ARGS__)
-
-/**
- * @cond
- */
-
-#define OV_ITT_TASK_NEXT_1(chainId)                                                                 \
-        chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskName()));
-
-#define OV_ITT_TASK_NEXT_2(chainId, taskOrTaskName)                                                 \
-        chainId.next(openvino::itt::handle<struct OV_PP_CAT(Task, __LINE__)>(chainId.taskNameOrHandle(taskOrTaskName)));
-
-/**
- * @endcond
- */
+#define OV_ITT_TASK_NEXT(...) OV_ITT_SCOPE_NEXT(ALL, __VA_ARGS__)
 
 /**
  * @def OV_ITT_TASK_SKIP(chainId)
@@ -331,7 +402,7 @@ inline openvino::itt::domain_t domainName() noexcept
  * @brief Skips the remaining task scope.
  * @param chainId [in] The tasks chain identifier.
  */
-#define OV_ITT_TASK_SKIP(chainId) chainId.skip();
+#define OV_ITT_TASK_SKIP(chainId) OV_ITT_SCOPE_SKIP(ALL, chainId);
 
     } // namespace itt
 } // namespace openvino
diff --git a/openvino/itt/src/itt.cpp b/openvino/itt/src/itt.cpp
index 5fa17f81213..de3fa04da8e 100644
--- a/openvino/itt/src/itt.cpp
+++ b/openvino/itt/src/itt.cpp
@@ -40,7 +40,7 @@ void taskBegin(domain_t d, handle_t t) {
 }
 
 void taskEnd(domain_t d) {
-    if (!callStackDepth() || call_stack_depth-- > 0)
+    if (!callStackDepth() || --call_stack_depth < callStackDepth())
         __itt_task_end(reinterpret_cast<__itt_domain*>(d));
 }
 
diff --git a/openvino/pp/include/openvino/pp.hpp b/openvino/pp/include/openvino/pp.hpp
index f54e6e1b015..6931c8f1845 100644
--- a/openvino/pp/include/openvino/pp.hpp
+++ b/openvino/pp/include/openvino/pp.hpp
@@ -30,3 +30,20 @@
 #define OV_PP_CAT4(x, y, z, w) OV_PP_CAT4_(x, y, z, w)
 
 #define OV_PP_OVERLOAD(NAME, ...) OV_PP_EXPAND( OV_PP_CAT3(NAME, _, OV_PP_EXPAND( OV_PP_NARG(OV_PP_NO_ARGS __VA_ARGS__ (NAME)) ))(__VA_ARGS__) )
+
+// Placeholder for first macro argument
+#define OV_PP_ARG_PLACEHOLDER_1 0,
+
+// This macro returns second argument, first argument is ignored
+#define OV_PP_SECOND_ARG(...) OV_PP_EXPAND(OV_PP_SECOND_ARG_(__VA_ARGS__, 0))
+#define OV_PP_SECOND_ARG_(...) OV_PP_EXPAND(OV_PP_SECOND_ARG_GET(__VA_ARGS__))
+#define OV_PP_SECOND_ARG_GET(ignored, val, ...) val
+
+// Return macro argument value
+#define OV_PP_IS_ENABLED(x) OV_PP_IS_ENABLED1(x)
+
+// Generate junk macro or {0, } sequence if val is 1
+#define OV_PP_IS_ENABLED1(val) OV_PP_IS_ENABLED2(OV_PP_CAT(OV_PP_ARG_PLACEHOLDER_, val))
+
+// Return second argument from possible sequences {1, 0}, {0, 1, 0}
+#define OV_PP_IS_ENABLED2(arg1_or_junk) OV_PP_SECOND_ARG(arg1_or_junk 1, 0)

From 64a032fa18be4dbbb511519c523fddc1087414c4 Mon Sep 17 00:00:00 2001
From: Tomasz Socha <tomasz.socha@intel.com>
Date: Thu, 29 Apr 2021 06:49:05 +0200
Subject: [PATCH 04/73] [ONNX] Add ONNX Compress operator. (#5400)

* [ONNX] Add support for ONNX's Compress operator

* NonZero fixes

* Add unit tests as constant_network

* Style

* Move compress tests

* xfail python tests

* Fix func test

* Review Fix I
---
 .../transformations/nop_elimination.cpp       |  2 +-
 ngraph/core/src/op/non_zero.cpp               |  7 +-
 .../include/onnx_editor/editor_types.hpp      |  4 +-
 .../frontend/onnx_import/src/op/compress.cpp  | 51 ++++++++++
 .../frontend/onnx_import/src/op/compress.hpp  | 23 +++++
 .../frontend/onnx_import/src/op/non_zero.cpp  |  6 +-
 .../frontend/onnx_import/src/ops_bridge.cpp   |  2 +
 ngraph/python/tests/__init__.py               |  2 -
 ngraph/python/tests/test_onnx/test_backend.py | 10 +-
 ngraph/test/CMakeLists.txt                    |  4 +-
 ngraph/test/models/onnx/compress_0.prototxt   | 64 ++++++++++++
 ngraph/test/models/onnx/compress_1.prototxt   | 64 ++++++++++++
 .../onnx/compress_default_axis.prototxt       | 56 +++++++++++
 .../onnx/compress_negative_axis.prototxt      | 64 ++++++++++++
 .../test/onnx/onnx_import_with_editor.in.cpp  | 98 +++++++++++++++++++
 15 files changed, 443 insertions(+), 14 deletions(-)
 create mode 100644 ngraph/frontend/onnx_import/src/op/compress.cpp
 create mode 100644 ngraph/frontend/onnx_import/src/op/compress.hpp
 create mode 100644 ngraph/test/models/onnx/compress_0.prototxt
 create mode 100644 ngraph/test/models/onnx/compress_1.prototxt
 create mode 100644 ngraph/test/models/onnx/compress_default_axis.prototxt
 create mode 100644 ngraph/test/models/onnx/compress_negative_axis.prototxt
 create mode 100644 ngraph/test/onnx/onnx_import_with_editor.in.cpp

diff --git a/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp b/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp
index e5afcc84454..8212cb4fb45 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp
@@ -40,7 +40,7 @@ TEST(nop_elimination, eliminate_convert) {
 
 TEST(nop_elimination, convert_type_agnostic) {
     Shape shape{};
-    auto type = element::from<char>();
+    auto type = element::from<int8_t>();
     auto A = make_shared<op::Parameter>(type, shape);
     auto c1 = make_shared<op::v0::Convert>(A, element::from<uint8_t>());
     auto c = make_shared<op::v0::Convert>(c1, element::f32);
diff --git a/ngraph/core/src/op/non_zero.cpp b/ngraph/core/src/op/non_zero.cpp
index db7998f718a..19e52f77fe9 100644
--- a/ngraph/core/src/op/non_zero.cpp
+++ b/ngraph/core/src/op/non_zero.cpp
@@ -51,7 +51,7 @@ void op::v3::NonZero::validate_and_infer_types()
     const auto input_et = get_input_element_type(0);
 
     NODE_VALIDATION_CHECK(this,
-                          input_et.is_integral() || input_et.is_real(),
+                          input_et.is_integral_number() || input_et.is_real(),
                           "NonZero input data type needs to be a numeric type. Got: ",
                           input_et);
     NODE_VALIDATION_CHECK(this,
@@ -154,13 +154,18 @@ namespace nonzero
 
         switch (input->get_element_type())
         {
+            NGRAPH_TYPE_CASE(evaluate_nonzero, i8, input, output);
+            NGRAPH_TYPE_CASE(evaluate_nonzero, i16, input, output);
             NGRAPH_TYPE_CASE(evaluate_nonzero, i32, input, output);
             NGRAPH_TYPE_CASE(evaluate_nonzero, i64, input, output);
             NGRAPH_TYPE_CASE(evaluate_nonzero, u8, input, output);
+            NGRAPH_TYPE_CASE(evaluate_nonzero, u16, input, output);
             NGRAPH_TYPE_CASE(evaluate_nonzero, u32, input, output);
             NGRAPH_TYPE_CASE(evaluate_nonzero, u64, input, output);
+            NGRAPH_TYPE_CASE(evaluate_nonzero, bf16, input, output);
             NGRAPH_TYPE_CASE(evaluate_nonzero, f16, input, output);
             NGRAPH_TYPE_CASE(evaluate_nonzero, f32, input, output);
+            NGRAPH_TYPE_CASE(evaluate_nonzero, f64, input, output);
         default: rc = false; break;
         }
         return rc;
diff --git a/ngraph/frontend/onnx_editor/include/onnx_editor/editor_types.hpp b/ngraph/frontend/onnx_editor/include/onnx_editor/editor_types.hpp
index 69a663be617..56afa34af32 100644
--- a/ngraph/frontend/onnx_editor/include/onnx_editor/editor_types.hpp
+++ b/ngraph/frontend/onnx_editor/include/onnx_editor/editor_types.hpp
@@ -60,5 +60,5 @@ namespace ngraph
         ///            OutputEdge(5, "out1")
         ///            OutputEdge(5, "out2")
         using OutputEdge = Edge<EdgeType::OUTPUT>;
-    }
-}
+    } // namespace onnx_editor
+} // namespace ngraph
diff --git a/ngraph/frontend/onnx_import/src/op/compress.cpp b/ngraph/frontend/onnx_import/src/op/compress.cpp
new file mode 100644
index 00000000000..f7658a5e7aa
--- /dev/null
+++ b/ngraph/frontend/onnx_import/src/op/compress.cpp
@@ -0,0 +1,51 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+
+#include "default_opset.hpp"
+#include "ngraph/builder/reshape.hpp"
+#include "op/compress.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                OutputVector compress(const Node& node)
+                {
+                    auto data = node.get_ng_inputs().at(0);
+                    auto condition = std::make_shared<default_opset::Convert>(
+                        node.get_ng_inputs().at(1), element::u8);
+
+                    int64_t axis = 0;
+                    if (node.has_attribute("axis"))
+                    {
+                        axis = node.get_attribute_value<int64_t>("axis");
+                    }
+                    else
+                    {
+                        data = std::make_shared<default_opset::Squeeze>(
+                            ngraph::builder::opset1::flatten(data, axis));
+                    }
+                    auto axis_node = default_opset::Constant::create(element::i64, Shape{}, {axis});
+                    auto zero_node = default_opset::Constant::create(element::i64, Shape{}, {0});
+                    auto result = std::make_shared<default_opset::Gather>(
+                        data,
+                        std::make_shared<default_opset::Squeeze>(
+                            std::make_shared<default_opset::NonZero>(condition), zero_node),
+                        axis_node);
+
+                    return {result};
+                }
+            } // namespace set_1
+
+        } // namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
diff --git a/ngraph/frontend/onnx_import/src/op/compress.hpp b/ngraph/frontend/onnx_import/src/op/compress.hpp
new file mode 100644
index 00000000000..9311a4725dc
--- /dev/null
+++ b/ngraph/frontend/onnx_import/src/op/compress.hpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/output_vector.hpp"
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                OutputVector compress(const Node& node);
+            }
+        } // namespace op
+    }     // namespace onnx_import
+
+} // namespace ngraph
diff --git a/ngraph/frontend/onnx_import/src/op/non_zero.cpp b/ngraph/frontend/onnx_import/src/op/non_zero.cpp
index 550f24b50c3..5f580111e0d 100644
--- a/ngraph/frontend/onnx_import/src/op/non_zero.cpp
+++ b/ngraph/frontend/onnx_import/src/op/non_zero.cpp
@@ -17,7 +17,11 @@ namespace ngraph
             {
                 OutputVector non_zero(const Node& node)
                 {
-                    const auto data = node.get_ng_inputs().at(0);
+                    auto data = node.get_ng_inputs().at(0);
+                    if (data.get_element_type() == element::boolean)
+                    {
+                        data = std::make_shared<default_opset::Convert>(data, element::u8);
+                    }
                     return {std::make_shared<default_opset::NonZero>(data, element::i64)};
                 }
 
diff --git a/ngraph/frontend/onnx_import/src/ops_bridge.cpp b/ngraph/frontend/onnx_import/src/ops_bridge.cpp
index d4eccd61478..85d27a05aff 100644
--- a/ngraph/frontend/onnx_import/src/ops_bridge.cpp
+++ b/ngraph/frontend/onnx_import/src/ops_bridge.cpp
@@ -27,6 +27,7 @@
 #include "op/cast.hpp"
 #include "op/ceil.hpp"
 #include "op/clip.hpp"
+#include "op/compress.hpp"
 #include "op/concat.hpp"
 #include "op/constant.hpp"
 #include "op/constant_fill.hpp"
@@ -326,6 +327,7 @@ namespace ngraph
             REGISTER_OPERATOR("Conv", 1, conv);
             // REGISTER_OPERATOR("ConvInteger", 1, conv_integer);
             REGISTER_OPERATOR("ConvTranspose", 1, conv_transpose);
+            REGISTER_OPERATOR("Compress", 1, compress);
             REGISTER_OPERATOR("Cos", 1, cos);
             REGISTER_OPERATOR("Cosh", 1, cosh);
             REGISTER_OPERATOR("ConstantFill", 1, constant_fill);
diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py
index 09a597a881f..1a5925e56bf 100644
--- a/ngraph/python/tests/__init__.py
+++ b/ngraph/python/tests/__init__.py
@@ -42,8 +42,6 @@ xfail_issue_33596 = xfail_test(reason="RuntimeError: nGraph does not support dif
                                       "SequenceEmpty, SequenceInsert, SequenceErase, SequenceLength ")
 xfail_issue_33606 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
                                       "Det")
-xfail_issue_33644 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
-                                      "Compress")
 xfail_issue_33651 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
                                       "TfIdfVectorizer")
 xfail_issue_33581 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py
index aa136fd1525..e508551b2c0 100644
--- a/ngraph/python/tests/test_onnx/test_backend.py
+++ b/ngraph/python/tests/test_onnx/test_backend.py
@@ -19,7 +19,6 @@ from tests import (BACKEND_NAME,
                    xfail_issue_33596,
                    xfail_issue_33606,
                    xfail_issue_33633,
-                   xfail_issue_33644,
                    xfail_issue_33651,
                    xfail_issue_38091,
                    xfail_issue_38699,
@@ -312,11 +311,6 @@ tests_expected_to_fail = [
     (xfail_issue_33606,
      "OnnxBackendNodeModelTest.test_det_2d_cpu",
      "OnnxBackendNodeModelTest.test_det_nd_cpu"),
-    (xfail_issue_33644,
-     "OnnxBackendNodeModelTest.test_compress_negative_axis_cpu",
-     "OnnxBackendNodeModelTest.test_compress_default_axis_cpu",
-     "OnnxBackendNodeModelTest.test_compress_1_cpu",
-     "OnnxBackendNodeModelTest.test_compress_0_cpu"),
     (xfail_issue_38732,
      "OnnxBackendNodeModelTest.test_convinteger_with_padding_cpu",
      "OnnxBackendNodeModelTest.test_basic_convinteger_cpu"),
@@ -374,6 +368,10 @@ tests_expected_to_fail = [
     (xfail_issue_44956,
      "OnnxBackendNodeModelTest.test_loop11_cpu"),
     (xfail_issue_44957,
+     "OnnxBackendNodeModelTest.test_compress_0_cpu",
+     "OnnxBackendNodeModelTest.test_compress_1_cpu",
+     "OnnxBackendNodeModelTest.test_compress_default_axis_cpu",
+     "OnnxBackendNodeModelTest.test_compress_negative_axis_cpu",
      "OnnxBackendNodeModelTest.test_nonzero_example_cpu"),
     (xfail_issue_44958,
      "OnnxBackendNodeModelTest.test_upsample_nearest_cpu"),
diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt
index 6d90dbc3f9c..dd7f23d04d2 100644
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -439,7 +439,9 @@ endif()
 
 if (NGRAPH_ONNX_EDITOR_ENABLE)
     list(APPEND SRC onnx/onnx_editor.cpp)
-    list(APPEND MULTI_TEST_SRC onnx/onnx_test_utils.in.cpp)
+    list(APPEND MULTI_TEST_SRC
+        onnx/onnx_test_utils.in.cpp
+        onnx/onnx_import_with_editor.in.cpp)
 endif()
 
 add_clang_format_target(unit-test_clang FOR_SOURCES ${SRC} ${MULTI_TEST_SRC})
diff --git a/ngraph/test/models/onnx/compress_0.prototxt b/ngraph/test/models/onnx/compress_0.prototxt
new file mode 100644
index 00000000000..d3e34c188bf
--- /dev/null
+++ b/ngraph/test/models/onnx/compress_0.prototxt
@@ -0,0 +1,64 @@
+ir_version: 3
+producer_name: "backend-test"
+graph {
+  node {
+    input: "input"
+    input: "condition"
+    output: "output"
+    op_type: "Compress"
+    attribute {
+      name: "axis"
+      i: 0
+      type: INT
+    }
+  }
+  name: "test_compress_0"
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "condition"
+    type {
+      tensor_type {
+        elem_type: 9
+        shape {
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 9
+}
diff --git a/ngraph/test/models/onnx/compress_1.prototxt b/ngraph/test/models/onnx/compress_1.prototxt
new file mode 100644
index 00000000000..e76930e5f79
--- /dev/null
+++ b/ngraph/test/models/onnx/compress_1.prototxt
@@ -0,0 +1,64 @@
+ir_version: 3
+producer_name: "backend-test"
+graph {
+  node {
+    input: "input"
+    input: "condition"
+    output: "output"
+    op_type: "Compress"
+    attribute {
+      name: "axis"
+      i: 1
+      type: INT
+    }
+  }
+  name: "test_compress_1"
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "condition"
+    type {
+      tensor_type {
+        elem_type: 9
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 9
+}
diff --git a/ngraph/test/models/onnx/compress_default_axis.prototxt b/ngraph/test/models/onnx/compress_default_axis.prototxt
new file mode 100644
index 00000000000..825ebcb0d03
--- /dev/null
+++ b/ngraph/test/models/onnx/compress_default_axis.prototxt
@@ -0,0 +1,56 @@
+ir_version: 3
+producer_name: "backend-test"
+graph {
+  node {
+    input: "input"
+    input: "condition"
+    output: "output"
+    op_type: "Compress"
+  }
+  name: "test_compress_default_axis"
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "condition"
+    type {
+      tensor_type {
+        elem_type: 9
+        shape {
+          dim {
+            dim_value: 5
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 9
+}
diff --git a/ngraph/test/models/onnx/compress_negative_axis.prototxt b/ngraph/test/models/onnx/compress_negative_axis.prototxt
new file mode 100644
index 00000000000..51a05bc452f
--- /dev/null
+++ b/ngraph/test/models/onnx/compress_negative_axis.prototxt
@@ -0,0 +1,64 @@
+ir_version: 6
+producer_name: "backend-test"
+graph {
+  node {
+    input: "input"
+    input: "condition"
+    output: "output"
+    op_type: "Compress"
+    attribute {
+      name: "axis"
+      i: -1
+      type: INT
+    }
+  }
+  name: "test_compress_negative_axis"
+  input {
+    name: "input"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "condition"
+    type {
+      tensor_type {
+        elem_type: 9
+        shape {
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "output"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 1
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
diff --git a/ngraph/test/onnx/onnx_import_with_editor.in.cpp b/ngraph/test/onnx/onnx_import_with_editor.in.cpp
new file mode 100644
index 00000000000..99ceef8011d
--- /dev/null
+++ b/ngraph/test/onnx/onnx_import_with_editor.in.cpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// clang-format off
+#ifdef ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#define DEFAULT_FLOAT_TOLERANCE_BITS ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
+#endif
+#ifdef ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#define DEFAULT_DOUBLE_TOLERANCE_BITS ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
+#endif
+// clang-format on
+
+#include "gtest/gtest.h"
+#include "onnx_editor/editor.hpp"
+#include "ngraph/ngraph.hpp"
+#include "util/test_case.hpp"
+#include "util/test_control.hpp"
+#include "util/engine/test_engines.hpp"
+
+using namespace ngraph;
+
+static std::string s_manifest = "${MANIFEST}";
+
+using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
+
+// ############################################################################ CORE TESTS
+NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_axis_0)
+{
+    onnx_editor::ONNXModelEditor editor{
+        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_0.prototxt")};
+
+    std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
+
+    in_vals.emplace("input", op::Constant::create(element::f32, Shape{3, 2}, {1., 2., 3., 4., 5., 6.}));
+    in_vals.emplace("condition", op::Constant::create(element::boolean, Shape{3}, {false, true, true}));
+    editor.set_input_values(in_vals);
+
+    const auto function = editor.get_function();
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    test_case.add_expected_output<float>(Shape{2, 2}, {3., 4., 5., 6.});
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_axis_1)
+{
+    onnx_editor::ONNXModelEditor editor{
+        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_1.prototxt")};
+
+    std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
+
+    in_vals.emplace("input", op::Constant::create(element::f32, Shape{3, 2}, {1., 2., 3., 4., 5., 6.}));
+    in_vals.emplace("condition", op::Constant::create(element::boolean, Shape{2}, {false, true}));
+    editor.set_input_values(in_vals);
+
+    const auto function = editor.get_function();
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    test_case.add_expected_output<float>(Shape{3, 1}, {2., 4., 6.});
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_default_axis)
+{
+    onnx_editor::ONNXModelEditor editor{
+        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_default_axis.prototxt")};
+
+    std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
+
+    in_vals.emplace("input", op::Constant::create(element::f32, Shape{3, 2}, {1., 2., 3., 4., 5., 6.}));
+    in_vals.emplace("condition", op::Constant::create(element::boolean, Shape{5}, {false, true, false, false, true}));
+    editor.set_input_values(in_vals);
+
+    const auto function = editor.get_function();
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    test_case.add_expected_output<float>(Shape{2}, {2., 5.});
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_negative_axis)
+{
+    onnx_editor::ONNXModelEditor editor{
+        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_negative_axis.prototxt")};
+
+    std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
+
+    in_vals.emplace("input", op::Constant::create(element::f32, Shape{3, 2}, {1., 2., 3., 4., 5., 6.}));
+    in_vals.emplace("condition", op::Constant::create(element::boolean, Shape{2}, {false, true}));
+    editor.set_input_values(in_vals);
+
+    const auto function = editor.get_function();
+    auto test_case = test::TestCase<TestEngine>(function);
+
+    test_case.add_expected_output<float>(Shape{3, 1}, {2., 4., 6.});
+    test_case.run();
+}

From 449f3376e131f1a5ff21cec3dbe16ddaf28a8efe Mon Sep 17 00:00:00 2001
From: Vladislav Golubev <vladislav.golubev@intel.com>
Date: Thu, 29 Apr 2021 10:47:04 +0300
Subject: [PATCH 05/73] [Transformations] ShuffleChannelsFusion fix and tests
 added (#5448)

---
 .../shuffle_channels_fusion.cpp               |  6 ++-
 .../shuffle_channels_fusion_test.cpp          | 47 +++++++++++--------
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/shuffle_channels_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/shuffle_channels_fusion.cpp
index 054fb411f33..464761390a2 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/shuffle_channels_fusion.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/shuffle_channels_fusion.cpp
@@ -55,7 +55,11 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ShuffleChannelsFusion, "ShuffleChannelsFusi
 
 ngraph::pass::ShuffleChannelsFusion::ShuffleChannelsFusion(const bool reshape_constants_check) {
     MATCHER_SCOPE(ShuffleChannelsFusion);
-    auto input = ngraph::pattern::any_input(pattern::has_static_shape());
+    auto has_static_4d_shape = [](const Output<Node>& output) {
+        return pattern::has_static_shape()(output) && pattern::rank_equals(4)(output);
+    };
+
+    auto input = ngraph::pattern::any_input(has_static_4d_shape);
     auto reshape_before_const_pattern = ngraph::pattern::wrap_type<ngraph::opset6::Constant>();
     auto transpose_const_pattern = ngraph::pattern::wrap_type<ngraph::opset6::Constant>();
     auto reshape_after_const_pattern = ngraph::pattern::wrap_type<ngraph::opset6::Constant>();
diff --git a/inference-engine/tests/functional/inference_engine/transformations/shuffle_channels_fusion_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/shuffle_channels_fusion_test.cpp
index f8e9c6ec259..db379df386f 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/shuffle_channels_fusion_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/shuffle_channels_fusion_test.cpp
@@ -22,11 +22,10 @@ using namespace ngraph;
 
 class ShuffleChannelsFusionTestValues {
 public:
-    bool dynamicShape;
+    ngraph::PartialShape inputPartialShape;
     std::vector<int64_t> reshape_before_val;
     std::vector<size_t> transpose_val;
     std::vector<int64_t> reshape_after_val;
-    size_t batch_size;
     bool check_reshape_values;
     bool fuse_happened;
 };
@@ -49,8 +48,7 @@ public:
     void SetUp() override {
         const auto values = GetParam();
         {
-            const PartialShape inputPartialShape = values.dynamicShape ? PartialShape::dynamic() : Shape{ values.batch_size, 128, 720, 480 };
-            auto input0 = std::make_shared<opset6::Parameter>(element::f32, inputPartialShape);
+            auto input0 = std::make_shared<opset6::Parameter>(element::f32, values.inputPartialShape);
             auto shape_reshape_before = opset6::Constant::create(element::i64, Shape{ values.reshape_before_val.size() }, values.reshape_before_val);
             auto permutation = opset6::Constant::create(element::i64, Shape{ values.transpose_val.size() }, values.transpose_val);
             auto shape_reshape_after = opset6::Constant::create(element::i64, Shape{ values.reshape_after_val.size() }, values.reshape_after_val);
@@ -69,7 +67,7 @@ public:
         }
 
         if (values.fuse_happened) {
-            auto input0 = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, ngraph::Shape{ values.batch_size, 128, 720, 480 });
+            auto input0 = std::make_shared<ngraph::opset6::Parameter>(ngraph::element::f32, values.inputPartialShape);
             auto shuffle_channels = std::make_shared<ngraph::opset6::ShuffleChannels>(input0, 1, values.reshape_before_val[1]);
             f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ shuffle_channels }, ngraph::ParameterVector{ input0 });
         } else {
@@ -81,10 +79,10 @@ public:
         const ShuffleChannelsFusionTestValues testValues = obj.param;
 
         std::ostringstream result;
-        if (testValues.dynamicShape) {
+        if (testValues.inputPartialShape.is_dynamic()) {
             result << "_dynamic_shape_";
         } else {
-            result << "_batch_size_" << testValues.batch_size;
+            result << "_input_shape_" << testValues.inputPartialShape;
         }
 
         result << "_before_" << testValues.reshape_before_val
@@ -105,19 +103,30 @@ TEST_P(ShuffleChannelsFusion, CompareFunctions) {
 }
 
 const std::vector<ShuffleChannelsFusionTestValues> testValues = {
-    { true, {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, 128, 720, 480}, 1, false, false },
-    { false, {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, 128, 720, 480}, 1, false, true },
-    { false, {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, 128, 720, 480}, 1, true, true },
-    { false, {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, -1, 720, 480}, 1, false, true },
-    { false, {4, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, -1, 720, 480}, 4, false, false },
-    { false, {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, -1, 720, 480}, 1, true, false },
-    { true, {1, 4, 32, 720 * 480}, {0, 2, 1, 3},  {1, 128, 720, 480}, 1, false, false },
-    { false, {1, 4, 32, 720 * 480}, {0, 2, 1, 3},  {1, 128, 720, 480}, 1, false, true },
-    { false, {1, 2, 64, 720 * 480}, {0, 2, 1, 3},  {1, 128, 720, 480}, 1, true, true },
-    { false, {1, 2, 64, 720 * 480}, {0, 2, 1, 3},  {1, -1, 720, 480}, 1, false, true },
-    { false, {4, 2, 64, 720 * 480}, {0, 2, 1, 3},  {1, -1, 720, 480}, 4, false, false },
-    { false, {1, 2, 64, 720 * 480}, {0, 2, 1, 3},  {1, -1, 720, 480}, 1, true, false },
+    // dynamic shape
+    { ngraph::PartialShape::dynamic(), {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, 128, 720, 480}, false, false },
+    { ngraph::PartialShape::dynamic(), {1, 4, 32, 720 * 480}, {0, 2, 1, 3},  {1, 128, 720, 480}, false, false },
+
+    // 4D, batch_size = 1, 4D reshape constant
+    { {1, 128, 720, 480}, {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, 128, 720, 480}, false, true },
+    { {1, 128, 720, 480}, {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, 128, 720, 480}, true, true },
+    { {1, 128, 720, 480}, {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, -1, 720, 480}, false, true },
+    { {1, 128, 720, 480}, {1, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, -1, 720, 480}, true, false },
+
+    // 4D, batch_size = 1, 3D reshape constant
+    { {1, 128, 720, 480}, {1, 4, 32, 720 * 480}, {0, 2, 1, 3},  {1, 128, 720, 480}, false, true },
+    { {1, 128, 720, 480}, {1, 2, 64, 720 * 480}, {0, 2, 1, 3},  {1, 128, 720, 480}, true, true },
+    { {1, 128, 720, 480}, {1, 2, 64, 720 * 480}, {0, 2, 1, 3},  {1, -1, 720, 480}, false, true },
+    { {1, 128, 720, 480}, {1, 2, 64, 720 * 480}, {0, 2, 1, 3},  {1, -1, 720, 480}, true, false },
+
+    // 4D, batch_size = 4
+    { {4, 128, 720, 480}, {4, 2, 64, 720, 480}, {0, 2, 1, 3, 4},  {1, -1, 720, 480}, false, false },
+    { {4, 128, 720, 480}, {4, 2, 64, 720 * 480}, {0, 2, 1, 3},  {1, -1, 720, 480}, false, false },
+
+    // 2D
+    { {128, 720 * 480}, {1, 2, 64, 720 * 480}, {0, 2, 1, 3},  {1, -1, 720, 480}, false, false },
 };
+
 INSTANTIATE_TEST_CASE_P(
     TransformationTests,
     ShuffleChannelsFusion,

From 2fb6b46e5a5578f460ac3d4253cdf8f44b5c9a93 Mon Sep 17 00:00:00 2001
From: Krzysztof Bruniecki <krzysztof.bruniecki@intel.com>
Date: Thu, 29 Apr 2021 09:49:00 +0200
Subject: [PATCH 06/73] [GNA] Add limiatations for CNN2D fused layer (#5334)

* Add limiatations for input HWC kernel HW and output C

* Fix single layer tests

* Add limitations for pooling 2d, adjust tests to new limitations

* Disable input padding

* Apply review

* Apply review 2

* Fix undefined type on legacy API

* Fix centos

* fix cpplint

* Simplify
---
 .../src/gna_plugin/backend/dnn_types.h        |   2 +-
 .../gna_plugin/backend/gna_limitations.cpp    | 128 ++++++++++++++++++
 .../gna_plugin/backend/gna_limitations.hpp    |  75 +++++++++-
 .../src/gna_plugin/gna_graph_compiler.cpp     |  20 ++-
 .../src/gna_plugin/gna_graph_compiler.hpp     |   3 +
 .../src/gna_plugin/gna_plugin_policy.hpp      |   2 +-
 .../single_layer_tests/convolution.cpp        |  14 +-
 .../convolution_relu_sequence.cpp             |  69 ++++++++--
 8 files changed, 287 insertions(+), 26 deletions(-)
 create mode 100644 inference-engine/src/gna_plugin/backend/gna_limitations.cpp

diff --git a/inference-engine/src/gna_plugin/backend/dnn_types.h b/inference-engine/src/gna_plugin/backend/dnn_types.h
index 02e009d7841..223fb224203 100644
--- a/inference-engine/src/gna_plugin/backend/dnn_types.h
+++ b/inference-engine/src/gna_plugin/backend/dnn_types.h
@@ -197,7 +197,6 @@ typedef struct {
     uint32_t num_copy_rows;            // number of rows to copy
 } intel_copy_t;
 
-#if GNA_LIB_VER == 2
 enum OvGnaType {
     OvGnaTypeInt8 = 1,
     OvGnaTypeInt16 = 2,
@@ -205,6 +204,7 @@ enum OvGnaType {
     OvGnaTypePwl = 8,
 };
 
+#if GNA_LIB_VER == 2
 enum OvGnaMode {
     OvGnaModeDefault = 0,
     OvGnaModeDisabled = -1
diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
new file mode 100644
index 00000000000..98257eb3687
--- /dev/null
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
@@ -0,0 +1,128 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gna_limitations.hpp"
+
+#include <cstdint>
+
+using GNAPluginNS::GNALimitations::Cnn2D::Validator;
+using GNAPluginNS::GNALimitations::Cnn2D::VectorOrSquareLimit;
+using GNAPluginNS::GNALimitations::Cnn2D::VectorOrSquareLimitByChannels;
+using GNAPluginNS::GNALimitations::Cnn2D::VectorOrSquareLimitByChannelsAndPrecision;
+using GNAPluginNS::GNALimitations::Cnn2D::RangeLimit;
+using GNAPluginNS::GNALimitations::Cnn2D::RangeLimit2D;
+using GNAPluginNS::GNALimitations::Cnn2D::RangeMultipleLimit;
+
+bool RangeLimit::isValid(const uint32_t val) const {
+    return val >= min && val <= max;
+}
+
+std::string RangeLimit::GetErrorOrEmpty(const uint32_t val) const {
+    std::ostringstream out;
+    if (!isValid(val)) {
+        out << "Unsupported " << what << ", actual value: " << val << ", valid range [" << min << ", " << max << "]\n";
+    }
+    return out.str();
+}
+
+bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const {
+    return hLimit.isValid(h) && wLimit.isValid(w);
+}
+
+std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
+    return hLimit.GetErrorOrEmpty(h) + hLimit.GetErrorOrEmpty(w);
+}
+
+RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn) : RangeLimit(rlIn), multiplier(multiplierIn) {
+}
+
+bool RangeMultipleLimit::isValid(const uint32_t val) const {
+    return RangeLimit::isValid(val) && (val % multiplier == 0);
+}
+
+std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
+    auto e = RangeLimit::GetErrorOrEmpty(val);
+    std::ostringstream out;
+    if (!isValid(val)) {
+        out << "Unsupported " << what << ": " << val << ", must be multiple of " << multiplier << "\n";
+    }
+    return e + out.str();
+}
+
+bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
+    if (w == 1 && h >= 1 && h <= maxVectorHeight) return true;
+    if (h == 1 && w >= 1 && w <= maxVectorWidth) return true;
+    if (h == w && h <= maxSquare && h >= 1) return true;
+    return false;
+}
+
+std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
+    std::ostringstream out;
+    if (!isValid(h, w)) {
+        out << "Unsupported " << what << " shape, actual WxH: " << w << "x" << h <<
+            ", only vertical vector up to 1x" << maxVectorHeight << ", horizontal up to " << maxVectorWidth <<
+            "x1 or square up to " << maxSquare << "x" << maxSquare << " are valid\n";
+    }
+    return out.str();
+}
+
+VectorOrSquareLimit VectorOrSquareLimitByChannels::GetByChannels(const uint32_t channels) const {
+    return channels <= smallChannelMax ? smallChannel : bigChannel;
+}
+
+bool VectorOrSquareLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
+    return GetByChannels(channels).isValid(h, w);
+}
+
+std::string VectorOrSquareLimitByChannels::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
+    const uint32_t channels, std::string what) const {
+    return GetByChannels(channels).GetErrorOrEmpty(h, w, what);
+}
+
+VectorOrSquareLimitByChannels VectorOrSquareLimitByChannelsAndPrecision::GetByPrecision(const OvGnaType precision) const {
+    return precision == OvGnaTypeInt8 ? lowPrecision : defaultPrecision;
+}
+
+bool VectorOrSquareLimitByChannelsAndPrecision::isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const {
+    return GetByPrecision(precision).isValid(h, w, channels);
+}
+
+std::string VectorOrSquareLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
+    const OvGnaType precision, const uint32_t channels, std::string what) const {
+    return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
+}
+
+void Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
+    const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
+    OvGnaType inPrecision) const {
+    const std::string prefix = "Layer Convolution2D: " + name + ":";
+    auto error = inputHWLimit.GetErrorOrEmpty(inHeight, inWidth);
+
+    error += kernelNumberLimit.GetErrorOrEmpty(kN);
+
+    error += inputChannelsNumberLimit.GetErrorOrEmpty(inChannels);
+    error += kernelLimit.GetErrorOrEmpty(kH, kW, inPrecision, inChannels, "kernel");
+    ThrowIfNotEmpty(prefix, error);
+}
+
+void Validator::ValidatePooling2D(std::string name,
+    const uint32_t windowH, const uint32_t windowW,
+    const uint32_t strideH, const uint32_t strideW) const {
+    const std::string prefix = "Layer Pooling2D: " + name + ":";
+
+    auto error = poolingWindowLimit.GetErrorOrEmpty(windowH, windowW, "pooling window");
+    const RangeLimit poolingStrideHLimit{ 1, windowH, "pooling stride height (must be up to pooling window height)" };
+    const RangeLimit poolingStrideWLimit{ 1, windowW, "pooling stride width (must be up to pooling window width)" };
+
+    error += poolingStrideHLimit.GetErrorOrEmpty(strideH);
+    error += poolingStrideWLimit.GetErrorOrEmpty(strideW);
+
+    ThrowIfNotEmpty(prefix, error);
+}
+
+void Validator::ThrowIfNotEmpty(const std::string prefix, const std::string error) {
+    if (!error.empty()) {
+        THROW_GNA_EXCEPTION << prefix << error;
+    }
+}
diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
index 97539736283..40e256042aa 100644
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "dnn_types.h"
 #include <cstdint>
 
 namespace GNAPluginNS {
@@ -16,5 +17,77 @@ constexpr uint32_t convEachKernelByteAlignment = 16;
 constexpr uint32_t noOfInputsDivisor = 8;
 constexpr uint32_t noOfInputsLowPrecDivisor = 16;
 
-}
+namespace Cnn2D {
+struct RangeLimit {
+    uint32_t min;
+    uint32_t max;
+    std::string what;
+    bool isValid(const uint32_t val) const;
+    std::string GetErrorOrEmpty(const uint32_t val) const;
+};
+
+struct RangeLimit2D {
+    RangeLimit hLimit;
+    RangeLimit wLimit;
+    bool isValid(const uint32_t h, const uint32_t w) const;
+    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w) const;
+};
+
+struct RangeMultipleLimit : public RangeLimit {
+    uint32_t multiplier;
+    RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn);
+    bool isValid(const uint32_t val) const;
+    std::string GetErrorOrEmpty(const uint32_t val) const;
+};
+
+struct VectorOrSquareLimit {
+    uint32_t maxSquare;
+    uint32_t maxVectorHeight;
+    uint32_t maxVectorWidth;
+    bool isValid(const uint32_t h, const uint32_t w) const;
+    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
+};
+
+struct VectorOrSquareLimitByChannels {
+    uint32_t smallChannelMax;
+    VectorOrSquareLimit smallChannel;
+    VectorOrSquareLimit bigChannel;
+    VectorOrSquareLimit GetByChannels(const uint32_t channels) const;
+    bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
+    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
+        const uint32_t channels, std::string what) const;
+};
+
+struct VectorOrSquareLimitByChannelsAndPrecision {
+    VectorOrSquareLimitByChannels lowPrecision;
+    VectorOrSquareLimitByChannels defaultPrecision;
+    VectorOrSquareLimitByChannels GetByPrecision(const OvGnaType precision) const;
+    bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
+    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
+        const OvGnaType precision, const uint32_t channels, std::string what) const;
+};
+
+class Validator {
+    RangeLimit2D inputHWLimit{ { 16, 384, "input height"} , { 16, 240, "input width"} };
+    RangeMultipleLimit inputChannelsNumberLimit{ {8, 384, "number of input channels"}, 8 };
+
+    RangeMultipleLimit kernelNumberLimit{ {8, 256, "number of kernels"}, 8 };
+    VectorOrSquareLimitByChannelsAndPrecision kernelLimit {
+        { 240, { 3, 7, 3 }, { 2, 7, 2 } },
+        { 120, { 3, 7, 3 }, { 1, 7, 1 } } };
+
+    const VectorOrSquareLimit poolingWindowLimit{ 3, 1, 1 };
+
+    static void ThrowIfNotEmpty(const std::string prefix, const std::string error);
+public:
+    void ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
+        const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
+        OvGnaType inPrecision) const;
+
+    void ValidatePooling2D(std::string name,
+        const uint32_t windowH, const uint32_t windowW,
+        const uint32_t strideH, const uint32_t strideW) const;
+};
+} // namespace Cnn2D
+} // namespace GNALimitations
 } // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index c6324485cf7..4dee09ea498 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -575,7 +575,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
     }
     uint32_t num_feature_map_rows = (in_channels * in_height * in_width) / num_feature_map_columns;
 
-    uint32_t filter_n = convolution._out_depth;
+    const uint32_t filter_n = convolution._out_depth;
     uint32_t original_num_feature_map_rows = num_feature_map_rows;
 
     // if kernel padding to multiple of 8 will cause missed outputs, need to pad further
@@ -598,6 +598,9 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
     const auto weightPrec = OvGnaTypeIntFromBytes(convolution._weights->getTensorDesc().getPrecision().size());
     const auto biasPrec = OvGnaTypeIntFromBytes(biasPrecision.size());
 
+    cnn2dValidator.ValidateCnn2D(layer->name,
+        in_height, in_width, in_channels,
+        convolution._kernel_y, convolution._kernel_x, filter_n, inputPrec);
 
     float weight_scale_factor = 1.0f;
     float output_scale_factor = 1.0f;
@@ -859,6 +862,21 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
     void* ptr_inputs = nullptr;
     void* ptr_outputs = nullptr;
 
+    bool is2DPooling = false;
+    if (dnnComponents.components.size() > 0) {
+        const auto last = dnnComponents.components.back();
+        if (last.dnnComponent.operation == kDnnConvolutional2dOp) {
+            is2DPooling = true;
+        } else if (last.dnnComponent.operation == kDnnPiecewiselinearOp && dnnComponents.components.size() > 1) {
+            const auto& prev2 = *std::prev(dnnComponents.components.cend(), 2);
+            is2DPooling = prev2.dnnComponent.operation == kDnnConvolutional2dOp;
+        }
+    }
+
+    if (is2DPooling) {
+        cnn2dValidator.ValidatePooling2D(layer->name, pooling._kernel_y, pooling._kernel_x, pooling._stride_y, pooling._stride_x);
+    }
+
     auto& currentComponent = dnnComponents.addComponent(layer->name, "pooling");
 
     switch (pooling._type) {
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
index a7099d0d1f6..27246b443aa 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
@@ -24,6 +24,7 @@
 #include "layers/gna_split_layer.hpp"
 #include "backend/dnn_components.hpp"
 #include "backend/am_intel_dnn.hpp"
+#include "backend/gna_limitations.hpp"
 #include "gna_device.hpp"
 #include "gna_data_types.hpp"
 #include "gna_plugin_policy.hpp"
@@ -51,6 +52,8 @@ private:
     static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&);
     std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
 
+    const GNALimitations::Cnn2D::Validator cnn2dValidator;
+
 public:
     GNAPluginNS::backend::DnnComponents dnnComponents;
     MemoryConnection memory_connection;
diff --git a/inference-engine/src/gna_plugin/gna_plugin_policy.hpp b/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
index b14917a4061..0611ca0993c 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
@@ -68,7 +68,7 @@ class Policy {
         uint32_t limitedTo = LIMITED_TO_DEFAULT_GNA2_65536;
     } GNAAffineDiagonalPolicy;
 
-    bool cnn2dInputPaddingSupported = true;
+    bool cnn2dInputPaddingSupported = false;
 };
 
 inline std::ostream& operator<<(std::ostream& os, Policy::ScaleShift policy) {
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
index 307b6a1271b..79151f34c3b 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -66,13 +66,13 @@ const std::vector<std::vector<size_t>> inputShapesW1 = {{1, 1, 32, 1},
 const std::vector<size_t> numOutCannels = {4, 8, 12};
 
 const std::vector<std::vector<size_t >> kernels2D = {
+                                                          {5, 1},
                                                           {4, 1},
-                                                          {1, 4},
+                                                          {1, 3},
+                                                          {1, 2},
                                                           {2, 2},
-                                                          {2, 3},
-                                                          {3, 2},
-                                                          // {4, 2}, TODO: fix sporadic accuracy failures, see issue 45303
-                                                          // {3, 3}, TODO: fix sporadic accuracy failures, see issue 45303
+                                                          // {7, 1}, TODO: fix accuracy failures, see issue 54140
+                                                          // {3, 3}, TODO: fix accuracy failures, see issue 54140
 };
 const std::vector<std::vector<size_t >> strides2D = {
                                                           {1, 1},
@@ -83,9 +83,9 @@ const std::vector<std::vector<ptrdiff_t>> padEnds2D = { {0, 0},
 };
 const std::vector<std::vector<size_t >> dilations2D = { {1, 1},
 };
-const std::vector<size_t> numOutCannels2D = { 1, 2, 5 };
+const std::vector<size_t> numOutCannels2D = { 8, 16, 32};
 
-const std::vector<size_t> input2DNCHW = { 1, 2, 20, 15 };
+const std::vector<size_t> input2DNCHW = { 1, 8, 20, 16 };
 
 const std::vector<std::vector<size_t>> inputShapesMapTo1d = {{1, 1, 56, 5},
                                                              {1, 32, 56, 5},
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/convolution_relu_sequence.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/convolution_relu_sequence.cpp
index 5c0b1afd882..7600e34a7fb 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/convolution_relu_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/convolution_relu_sequence.cpp
@@ -40,11 +40,7 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<size_t> inputShapeSimple = {
-    {1, 32, 64, 16},
-};
-
-const std::vector<size_t> inputShapeSimpleWithPooling = {
-    {1, 32, 128, 32},
+    {1, 32, 64, 32},
 };
 
 const std::vector<convReluSpecificParams> convReluSpecificParamsSimpleSeq {
@@ -53,30 +49,34 @@ const std::vector<convReluSpecificParams> convReluSpecificParamsSimpleSeq {
         {2, 2},     // Stride
         {0, 0},     // Pad begin
         {0, 0},     // Pad end
-        3,         // Num out channels
+        16,         // Num out channels
         {1, 1},     //Pooling window
         {1, 1}      //Pooling stride
     },
     {
-        {2, 5},     // Kernel size
-        {2, 3},     // Stride
+        {2, 1},     // Kernel size
+        {2, 1},     // Stride
         {0, 0},     // Pad begin
         {0, 0},     // Pad end
-        8,         // Num out channels
+        8,          // Num out channels
         {1, 1},     //Pooling window
         {1, 1}      //Pooling stride
     },
 };
 
+const std::vector<size_t> inputShapeSimpleWithPooling = {
+    {1, 32, 53, 110},
+};
+
 const std::vector<convReluSpecificParams> convReluSpecificParamsSimpleSeqWithPooling {
     {
         {3, 3},     // Kernel size
         {1, 1},     // Stride
         {0, 0},     // Pad begin
         {0, 0},     // Pad end
-        3,         // Num out channels
-        {2, 3},     //Pooling window
-        {2, 3}      //Pooling stride
+        16,         // Num out channels
+        {3, 3},     //Pooling window
+        {3, 3}      //Pooling stride
     },
     {
         {2, 2},     // Kernel size
@@ -84,7 +84,7 @@ const std::vector<convReluSpecificParams> convReluSpecificParamsSimpleSeqWithPoo
         {0, 0},     // Pad begin
         {0, 0},     // Pad end
         8,         // Num out channels
-        {2, 3},     //Pooling window
+        {2, 2},     //Pooling window
         {2, 2}      //Pooling stride
     },
 };
@@ -123,15 +123,54 @@ const std::vector<convReluSpecificParams> convReluSpecificParamsFBSeq = {
     },
 };
 
+const InferenceEngine::SizeVector inputShape3 = {
+    {1, 8, 18, 54},
+};
+
+const std::vector<convReluSpecificParams> convReluSpecificParams3Seq = {
+    {
+        {1, 3},     // Kernel size
+        {1, 1},     // Stride
+        {0, 0},     // Pad begin
+        {0, 0},     // Pad end
+        32,         // Num out channels
+        {1, 1},     //Pooling window
+        {1, 1}      //Pooling stride
+    },
+    {
+        {2, 1},     // Kernel size
+        {1, 1},     // Stride
+        {0, 0},     // Pad begin
+        {0, 0},     // Pad end
+        8,          // Num out channels
+        {1, 1},     //Pooling window
+        {1, 1}      //Pooling stride
+    },
+        {
+        {3, 3},     // Kernel size
+        {3, 3},     // Stride
+        {0, 0},     // Pad begin
+        {0, 0},     // Pad end
+        8,          // Num out channels
+        {3, 3},     //Pooling window
+        {3, 3}      //Pooling stride
+    },
+};
+
 const std::vector<convReluSpecificParamsAll> convReluSpecificParamsAllAll = {
     {
         inputShapeSimple,
         convReluSpecificParamsSimpleSeq
     },
     {
-        inputShapeFB,
-        convReluSpecificParamsFBSeq
+        inputShape3,
+        convReluSpecificParams3Seq
     },
+    // Enable when bigger kernels (e.g., 5x7, 9x5) and input padding supported
+    // {
+    //     inputShapeFB,
+    //     convReluSpecificParamsFBSeq
+    // },
     {
         inputShapeSimpleWithPooling,
         convReluSpecificParamsSimpleSeqWithPooling

From 18fdc4b69490897a175829a3f5852bbefa08e4b8 Mon Sep 17 00:00:00 2001
From: Anton Voronov <anton.voronov@intel.com>
Date: Thu, 29 Apr 2021 11:41:33 +0300
Subject: [PATCH 07/73] [CPU] fixed SIGSEGV when moving unaligned data in
 Quantize node (Issue 51255) (#5262)

---
 .../nodes/mkldnn_quantize_node.cpp            | 37 +++++++++++++------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
index 6af113381ed..a48187adecc 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
@@ -1014,21 +1014,36 @@ void MKLDNNQuantizeNode::init() {
 
         bool quantizationOnly = true;
 
-        for (int i = 0; i < cropLow.size(); i++) {
-            float il = inputLowData[isInputLowBroadcasted ? 0 : i];
+        // WA: Depending on the hardware, the compiler may enable data movement optimizations that require working with aligned data.
+        // If we receive unaligned data, then working with them can lead to segfault. To avoid this situation, we added a function
+        // that pushes data into aligned memory.
+        auto prepareAlignedData = [](float *newData, const float *oldData, int dataSize) {
+            memcpy(newData, oldData, dataSize * sizeof(float));
+        };
 
-            cropLow[i] = il;
+        std::vector<float> inputLowDataAligned(inputLowAxisSize);
+        prepareAlignedData(&inputLowDataAligned[0], inputLowData, inputLowDataAligned.size());
+
+        std::vector<float> inputHighDataAligned(inputHighAxisSize);
+        prepareAlignedData(&inputHighDataAligned[0], inputHighData, inputHighDataAligned.size());
+
+        std::vector<float> outputLowDataAligned(outputLowAxisSize);
+        prepareAlignedData(&outputLowDataAligned[0], outputLowData, outputLowDataAligned.size());
+
+        std::vector<float> outputHighDataAligned(outputHighAxisSize);
+        prepareAlignedData(&outputHighDataAligned[0], outputHighData, outputHighDataAligned.size());
+
+        for (int i = 0; i < cropLow.size(); i++) {
+            cropLow[i] = inputLowDataAligned[isInputLowBroadcasted ? 0 : i];
         }
 
         for (int i = 0; i < cropHigh.size(); i++) {
-            float ih = inputHighData[isInputHighBroadcasted ? 0 : i];
-
-            cropHigh[i] = ih;
+            cropHigh[i] = inputHighDataAligned[isInputHighBroadcasted ? 0 : i];
         }
 
         for (int i = 0; i < inputScale.size(); i++) {
-            float il = inputLowData[isInputLowBroadcasted ? 0 : i];
-            float ih = inputHighData[isInputHighBroadcasted ? 0 : i];
+            float il = inputLowDataAligned[isInputLowBroadcasted ? 0 : i];
+            float ih = inputHighDataAligned[isInputHighBroadcasted ? 0 : i];
 
 #if defined(VALIDATE_QUANTIZATION_RANGES)
             if ((il == ih && levels != 2) || il > ih || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) {
@@ -1042,8 +1057,8 @@ void MKLDNNQuantizeNode::init() {
         }
 
         for (int i = 0; i < outputScale.size(); i++) {
-            float ol = outputLowData[isOutputLowBroadcasted ? 0 : i];
-            float oh = outputHighData[isOutputHighBroadcasted ? 0 : i];
+            float ol = outputLowDataAligned[isOutputLowBroadcasted ? 0 : i];
+            float oh = outputHighDataAligned[isOutputHighBroadcasted ? 0 : i];
 
 #if defined(VALIDATE_QUANTIZATION_RANGES)
             if (std::isnan(ol) || std::isnan(oh) || std::isinf(ol) || std::isinf(oh)) {
@@ -1059,7 +1074,7 @@ void MKLDNNQuantizeNode::init() {
         }
 
         for (int i = 0; i < outputShift.size(); i++) {
-            float ol = outputLowData[isOutputLowBroadcasted ? 0 : i];
+            float ol = outputLowDataAligned[isOutputLowBroadcasted ? 0 : i];
 
             outputShift[i] = ol;
 

From 768aec9d92955e6ea847627ab12928e7bcfadba2 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Thu, 29 Apr 2021 12:53:14 +0300
Subject: [PATCH 08/73] Trying to fix MSVC compilation (#5453)

---
 .../openvino/inference_engine/CMakeLists.txt  |  2 --
 .../openvino/inference_engine/ie_api_impl.cpp |  6 ++++
 .../openvino/inference_engine/ie_api_impl.hpp |  2 ++
 .../cpp/ie_executable_network.cpp             | 29 +++++++++----------
 4 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
index a74ae6d46c7..d90042d73b3 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -18,8 +18,6 @@ set_source_files_properties(${SOURCE} PROPERTIES CYTHON_IS_CXX ON)
 
 # create target
 
-disable_deprecated_warnings()
-
 cython_add_module(${TARGET_NAME} ${SOURCE})
 set(INSTALLED_TARGETS ${TARGET_NAME})
 
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
index 8244dfcd19d..9e90bb7de44 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
@@ -322,6 +322,8 @@ std::map<std::string, InferenceEngine::CDataPtr> InferenceEnginePython::IEExecNe
     return pyOutputs;
 }
 
+IE_SUPPRESS_DEPRECATED_START
+
 void InferenceEnginePython::InferRequestWrap::setBlob(const std::string& blob_name, const InferenceEngine::Blob::Ptr& blob_ptr) {
     InferenceEngine::ResponseDesc response;
     IE_CHECK_CALL(request_ptr->SetBlob(blob_name.c_str(), blob_ptr, &response));
@@ -427,6 +429,8 @@ std::map<std::string, InferenceEnginePython::ProfileInfo> InferenceEnginePython:
     return perf_map;
 }
 
+IE_SUPPRESS_DEPRECATED_END
+
 std::string InferenceEnginePython::get_version() {
     auto version = InferenceEngine::GetInferenceEngineVersion();
     std::string version_str = std::to_string(version->apiVersion.major) + ".";
@@ -487,6 +491,7 @@ void InferenceEnginePython::IEExecNetwork::createInferRequests(int num_requests)
     }
     infer_requests.resize(num_requests);
     InferenceEngine::ResponseDesc response;
+    IE_SUPPRESS_DEPRECATED_START
     for (size_t i = 0; i < num_requests; ++i) {
         InferRequestWrap& infer_request = infer_requests[i];
         infer_request.index = i;
@@ -496,6 +501,7 @@ void InferenceEnginePython::IEExecNetwork::createInferRequests(int num_requests)
         IE_CHECK_CALL(infer_request.request_ptr->SetUserData(&infer_request, &response));
         infer_request.request_ptr->SetCompletionCallback(latency_callback);
     }
+    IE_SUPPRESS_DEPRECATED_END
 }
 
 InferenceEnginePython::IENetwork InferenceEnginePython::IECore::readNetwork(const std::string& modelPath, const std::string& binPath) {
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
index 193413625d7..8fdf56b5b82 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
@@ -94,7 +94,9 @@ struct InferRequestWrap {
     int index;
     using cy_callback = void (*)(void*, int);
 
+    IE_SUPPRESS_DEPRECATED_START
     InferenceEngine::IInferRequest::Ptr request_ptr;
+    IE_SUPPRESS_DEPRECATED_END
     Time::time_point start_time;
     double exec_time;
     cy_callback user_callback;
diff --git a/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp b/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
index 45d66da3403..be2871da6a8 100644
--- a/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
@@ -50,6 +50,19 @@ void ExecutableNetwork::reset(IExecutableNetwork::Ptr newActual) {
     this->_impl.swap(newImpl);
 }
 
+ExecutableNetwork::operator IExecutableNetwork::Ptr() {
+    return std::make_shared<ExecutableNetworkBase>(_impl);
+}
+
+std::vector<VariableState> ExecutableNetwork::QueryState() {
+    std::vector<VariableState> controller;
+    EXEC_NET_CALL_STATEMENT(
+        for (auto&& state : _impl->QueryState()) {
+            controller.emplace_back(std::make_shared<VariableStateBase>(state), _so);
+        });
+    return controller;
+}
+
 IE_SUPPRESS_DEPRECATED_END
 
 InferRequest ExecutableNetwork::CreateInferRequest() {
@@ -68,26 +81,10 @@ void ExecutableNetwork::Export(std::ostream& networkModel) {
     EXEC_NET_CALL_STATEMENT(return _impl->Export(networkModel));
 }
 
-ExecutableNetwork::operator IExecutableNetwork::Ptr() {
-    return std::make_shared<ExecutableNetworkBase>(_impl);
-}
-
 CNNNetwork ExecutableNetwork::GetExecGraphInfo() {
-    IE_SUPPRESS_DEPRECATED_START
     EXEC_NET_CALL_STATEMENT(return _impl->GetExecGraphInfo());
 }
 
-IE_SUPPRESS_DEPRECATED_START
-std::vector<VariableState> ExecutableNetwork::QueryState() {
-    std::vector<VariableState> controller;
-    EXEC_NET_CALL_STATEMENT(
-        for (auto&& state : _impl->QueryState()) {
-            controller.emplace_back(std::make_shared<VariableStateBase>(state), _so);
-        });
-    return controller;
-}
-IE_SUPPRESS_DEPRECATED_END
-
 void ExecutableNetwork::SetConfig(const std::map<std::string, Parameter>& config) {
     EXEC_NET_CALL_STATEMENT(_impl->SetConfig(config));
 }

From b1a4a73328e132043878fe847b057d0450aa3d55 Mon Sep 17 00:00:00 2001
From: Anna Khakimova <anna.khakimova@intel.com>
Date: Thu, 29 Apr 2021 13:38:28 +0300
Subject: [PATCH 09/73] Pre-processing: Adding DivC and SubC kernels. (#5364)

* [PP] FLuid level tests for mean value preprocessing

* PP] Fluid operations for mean value preprocessing

* * Relaxed tolerance and fix for issue.

* * Fix for issue.

* * Applied comments.

Co-authored-by: Anton Potapov <anton.potapov@intel.com>
---
 .../ie_preprocess_gapi_kernels.cpp            |  54 ++++++++
 .../ie_preprocess_gapi_kernels.hpp            |  11 +-
 .../ie_preprocess_gapi_kernels_impl.hpp       |   2 +
 .../fluid_preproc/common/fluid_tests.cpp      |  82 ++++++++++++
 .../fluid_preproc/common/fluid_tests.hpp      |  12 ++
 .../fluid_preproc/cpu/fluid_tests_cpu.cpp     |  18 +++
 .../fluid_test_computations.cpp               | 125 +++++++++++++++---
 .../fluid_test_computations.hpp               |  24 ++++
 8 files changed, 308 insertions(+), 20 deletions(-)

diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
index 2b4c53f4d9a..0dd374bd3ff 100644
--- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
+++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
@@ -2461,6 +2461,58 @@ GAPI_FLUID_KERNEL(FConvertDepth, ConvertDepth, false) {
     }
 };
 
+namespace {
+    template <typename src_t, typename dst_t>
+    void sub(const uint8_t* src, uint8_t* dst, const int width, double c) {
+        const auto *in  = reinterpret_cast<const src_t *>(src);
+              auto *out = reinterpret_cast<dst_t *>(dst);
+
+        for (int i = 0; i < width; i++) {
+            out[i] = saturate_cast<dst_t>(in[i] - c);
+        }
+    }
+
+    template <typename src_t, typename dst_t>
+    void div(const uint8_t* src, uint8_t* dst, const int width, double c) {
+        const auto *in  = reinterpret_cast<const src_t *>(src);
+              auto *out = reinterpret_cast<dst_t *>(dst);
+
+        for (int i = 0; i < width; i++) {
+            out[i] = saturate_cast<dst_t>(in[i] / c);
+        }
+    }
+}  // namespace
+
+GAPI_FLUID_KERNEL(FSubC, GSubC, false) {
+    static const int Window = 1;
+
+    static void run(const cv::gapi::fluid::View& src, const cv::Scalar &scalar, int depth, cv::gapi::fluid::Buffer& dst) {
+        GAPI_Assert(src.meta().depth == CV_32F && src.meta().chan == 1);
+
+        const auto *in  = src.InLineB(0);
+              auto *out = dst.OutLineB();
+
+        auto const width = dst.length();
+
+        sub<float, float>(in, out, width, scalar[0]);
+    }
+};
+
+GAPI_FLUID_KERNEL(FDivC, GDivC, false) {
+    static const int Window = 1;
+
+    static void run(const cv::gapi::fluid::View &src, const cv::Scalar &scalar, double _scale, int /*dtype*/,
+            cv::gapi::fluid::Buffer &dst) {
+        GAPI_Assert(src.meta().depth == CV_32F && src.meta().chan == 1);
+
+        const auto *in  = src.InLineB(0);
+              auto *out = dst.OutLineB();
+
+        auto const width = dst.length();
+
+        div<float, float>(in, out, width, scalar[0]);
+    }
+};
 }  // namespace kernels
 
 //----------------------------------------------------------------------
@@ -2488,6 +2540,8 @@ cv::gapi::GKernelPackage preprocKernels() {
         , FNV12toRGB
         , FI420toRGB
         , FConvertDepth
+        , FSubC
+        , FDivC
         >();
 }
 
diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp
index b9f78156b86..a2f82fb5d9e 100644
--- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp
+++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp
@@ -152,8 +152,17 @@ namespace gapi {
         }
     };
 
+    G_TYPED_KERNEL(GSubC, <cv::GMat(cv::GMat, cv::GScalar, int)>, "com.intel.ie.math.subC") {
+        static cv::GMatDesc outMeta(cv::GMatDesc a, cv::GScalarDesc, int ddepth) {
+            return a.withDepth(ddepth);
+        }
+    };
 
-
+    G_TYPED_KERNEL(GDivC, <cv::GMat(cv::GMat, cv::GScalar, double, int)>, "com.intel.ie.math.divC") {
+        static cv::GMatDesc outMeta(cv::GMatDesc a, cv::GScalarDesc, double, int ddepth) {
+            return a.withDepth(ddepth);
+        }
+    };
     cv::gapi::GKernelPackage preprocKernels();
 
 }  // namespace gapi
diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_impl.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_impl.hpp
index 26ef353d4a7..452a29b2e2d 100644
--- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_impl.hpp
+++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_impl.hpp
@@ -57,6 +57,8 @@ template<> inline uint8_t saturate_cast(uint16_t x) {
                            std::max(static_cast<uint16_t>(lim::min()), x));
 }
 template<> inline uint8_t saturate_cast(float x)    { return saturate_cast<uint8_t>(static_cast<int>(std::rint(x))); }
+
+template<> inline float saturate_cast(double x)     { return x; }
 //------------------------------------------------------------------------------
 
 constexpr static const int ONE = 1 << 15;
diff --git a/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.cpp b/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.cpp
index bc25cfa969a..c7300214c7b 100644
--- a/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.cpp
+++ b/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.cpp
@@ -142,6 +142,7 @@ std::vector<test::Mat> to_test(std::vector<cv::Mat>& mats)
 }
 
 test::Rect to_test(cv::Rect& rect) { return {rect.x, rect.y, rect.width, rect.height}; }
+test::Scalar to_test(cv::Scalar const& sc) { return {sc[0], sc[1], sc[2], sc[3]}; }
 
 cv::ColorConversionCodes toCvtColorCode(InferenceEngine::ColorFormat in,
                                      InferenceEngine::ColorFormat out) {
@@ -678,6 +679,61 @@ TEST_P(ConvertDepthTestGAPI, AccuracyTest)
         EXPECT_LE(cv::norm(out_mat_ocv, out_mat_gapi, cv::NORM_INF), tolerance);
     }
 }
+
+TEST_P(DivCTestGAPI, AccuracyTest)
+{
+    const auto params       = GetParam();
+    const int in_depth      = std::get<0>(params);
+    const int in_channels   = std::get<1>(params);
+    const cv::Size sz       = std::get<2>(params);
+    const cv::Scalar C      = std::get<3>(params);
+    double tolerance        = std::get<4>(params);
+
+    const int in_type = CV_MAKETYPE(in_depth,in_channels);
+
+    initMatrixRandU(in_type, sz, in_type);
+
+    // G-API code
+    DivCComputation cc(to_test(in_mat1), to_test(out_mat_gapi), to_test(C));
+    cc.warmUp();
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        out_mat_ocv = in_mat1 / C;
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_LE(cv::norm(out_mat_ocv, out_mat_gapi, cv::NORM_INF), tolerance);
+    }
+}
+
+TEST_P(SubCTestGAPI, AccuracyTest)
+{
+    const auto params       = GetParam();
+    const int in_depth      = std::get<0>(params);
+    const int in_channels   = std::get<1>(params);
+    const cv::Size sz       = std::get<2>(params);
+    const cv::Scalar C      = std::get<3>(params);
+    const double tolerance  = std::get<4>(params);
+
+    const int in_type = CV_MAKETYPE(in_depth,in_channels);
+
+    initMatrixRandU(in_type, sz, in_type);
+
+    // G-API code
+    SubCComputation cc(to_test(in_mat1), to_test(out_mat_gapi), to_test(C));
+    cc.warmUp();
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        out_mat_ocv = in_mat1 - C;
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_LE(cv::norm(out_mat_ocv, out_mat_gapi, cv::NORM_INF), tolerance);
+    }
+}
+
 //----------------------------------------------------------------------
 
 TEST_P(ResizeTestIE, AccuracyTest)
@@ -1268,5 +1324,31 @@ TEST_P(PreprocTest, Performance)
             out_layout_str.c_str(), out_size.width, out_size.height,
             colorFormatToString(in_fmt).c_str(), colorFormatToString(out_fmt).c_str());
 #endif // PERF_TEST
+}
+
+TEST_P(MeanValueGAPI, AccuracyTest)
+{
+    const auto params = GetParam();
+    cv::Size sz       = std::get<0>(params);
+    double tolerance  = std::get<1>(params);
+
+    initMatrixRandU(CV_32FC1, sz, CV_32FC1);
+
+    const cv::Scalar mean = { 0.485, 0.456, 0.406 };
+    const cv::Scalar std  = { 0.229, 0.224, 0.225 };
+
+    // G-API code
+    MeanValueSubtractComputation cc(to_test(in_mat1), to_test(out_mat_gapi), to_test(mean), to_test(std));
+    cc.warmUp();
+
+    // OpenCV code /////////////////////////////////////////////////////////////
+    {
+        out_mat_ocv = (in_mat1 - mean) / std;
+    }
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_LE(cv::norm(out_mat_ocv, out_mat_gapi, cv::NORM_INF), tolerance);
+    }
 
 }
+
diff --git a/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.hpp b/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.hpp
index 53073f409d7..9230893f850 100644
--- a/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.hpp
+++ b/inference-engine/tests_deprecated/fluid_preproc/common/fluid_tests.hpp
@@ -25,6 +25,18 @@ struct ConvertDepthTestGAPI: public TestParams<std::tuple<
                             cv::Size,
                             double>>   // tolerance
 {};
+struct DivCTestGAPI: public TestParams<std::tuple<
+                            int,  // input matrix depth
+                            int,  // input matrix channels number
+                            cv::Size,
+                            cv::Scalar, // second operarnd
+                            double>>    // tolerance
+{};
+
+struct SubCTestGAPI : public DivCTestGAPI
+{};
+
+struct MeanValueGAPI: public TestParams<std::tuple<cv::Size, double>> {};
 //------------------------------------------------------------------------------
 
 struct ResizeTestIE: public testing::TestWithParam<std::tuple<int, int, std::pair<cv::Size, cv::Size>, double>> {};
diff --git a/inference-engine/tests_deprecated/fluid_preproc/cpu/fluid_tests_cpu.cpp b/inference-engine/tests_deprecated/fluid_preproc/cpu/fluid_tests_cpu.cpp
index d71cb8b3c1e..e20fa1f066d 100644
--- a/inference-engine/tests_deprecated/fluid_preproc/cpu/fluid_tests_cpu.cpp
+++ b/inference-engine/tests_deprecated/fluid_preproc/cpu/fluid_tests_cpu.cpp
@@ -183,6 +183,20 @@ INSTANTIATE_TEST_CASE_P(ConvertDepthFluid, ConvertDepthTestGAPI,
                                        cv::Size( 320,  200)),
                                 Values(1)));
 
+INSTANTIATE_TEST_CASE_P(DivCFluid, DivCTestGAPI,
+                        Combine(Values(CV_32F),
+                                Values(1),      //channels
+                                Values(TEST_SIZES),
+                                Values(cv::Scalar{0.229}),
+                                Values(0)));
+
+INSTANTIATE_TEST_CASE_P(SubCFluid, SubCTestGAPI,
+                        Combine(Values(CV_32F),
+                                Values(1),      //channels
+                                Values(TEST_SIZES),
+                                Values(cv::Scalar{0.229}),
+                                Values(0.00001)));
+
 INSTANTIATE_TEST_CASE_P(ResizeRoiTestFluid, ResizeRoiTestGAPI,
                         Combine(Values(CV_8UC1, CV_8UC3),
                                 Values(cv::INTER_LINEAR),
@@ -284,6 +298,10 @@ INSTANTIATE_TEST_CASE_P(Reorder_CHW2HWC, ColorConvertTestIE,
                                 Values(TEST_SIZES),
                                 Values(0)));
 
+INSTANTIATE_TEST_CASE_P(MeanValueGAPI32F, MeanValueGAPI,
+                        Combine(Values(TEST_SIZES),
+                                Values(0.00001)));
+
 //------------------------------------------------------------------------------
 
 namespace IE = InferenceEngine;
diff --git a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/fluid_test_computations.cpp b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/fluid_test_computations.cpp
index ce2206b659a..6ffd28e0bcd 100644
--- a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/fluid_test_computations.cpp
+++ b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/fluid_test_computations.cpp
@@ -12,8 +12,42 @@
 struct FluidComputation::Priv
 {
     cv::GComputation m_c;
-    std::vector<cv::gapi::own::Mat> m_v_in;
+    cv::GRunArgs m_v_in;
     std::vector<cv::gapi::own::Mat> m_v_out;
+
+    Priv(cv::GComputation && c, std::vector<cv::gapi::own::Mat>&& v_in, std::vector<cv::gapi::own::Mat>&& v_out)
+        : m_c(std::move(c)),
+          m_v_in(v_in.begin(), v_in.end()),
+          m_v_out(std::move(v_out))
+    {}
+
+    Priv(cv::GComputation && c, cv::gapi::own::Mat&& v_in, cv::gapi::own::Mat&& v_out)
+        : m_c(std::move(c)),
+          m_v_in{std::move(v_in)},
+          m_v_out{std::move(v_out)}
+    {}
+
+    Priv(cv::GComputation && c, cv::gapi::own::Mat&& v_in, std::vector<cv::gapi::own::Mat>&& v_out)
+        : m_c(std::move(c)),
+          m_v_in{std::move(v_in)},
+          m_v_out(std::move(v_out))
+    {}
+
+    Priv(cv::GComputation && c, cv::GRunArgs&& v_in, std::vector<cv::gapi::own::Mat>&& v_out)
+        : m_c(std::move(c)),
+          m_v_in(std::move(v_in)),
+          m_v_out(std::move(v_out))
+    {}
+
+    cv::GRunArgs  ins()  { return m_v_in;}
+    cv::GRunArgsP outs() {
+        cv::GRunArgsP call_outs;
+
+        for (auto &m : m_v_out) { call_outs.emplace_back(&m); }
+
+        return call_outs;
+    }
+
 };
 
 FluidComputation::FluidComputation(Priv *priv)
@@ -27,19 +61,24 @@ namespace
 
 void FluidComputation::warmUp(test::Rect roi)
 {
-    if (roi.empty())
-        m_priv->m_c.apply(m_priv->m_v_in, m_priv->m_v_out, cv::compile_args(InferenceEngine::gapi::preprocKernels()));
-    else
-        m_priv->m_c.apply(m_priv->m_v_in, m_priv->m_v_out, cv::compile_args(InferenceEngine::gapi::preprocKernels(), cv::GFluidOutputRois{{to_own(roi)}}));
+    auto compile_args = roi.empty() ? cv::compile_args(InferenceEngine::gapi::preprocKernels())
+                                    : cv::compile_args(InferenceEngine::gapi::preprocKernels(),
+                                                       cv::GFluidOutputRois{{to_own(roi)}});
+
+    m_priv->m_c.apply(m_priv->ins(), m_priv->outs(), std::move(compile_args));
 }
 
 void FluidComputation::apply()
 {
-    m_priv->m_c.apply(m_priv->m_v_in, m_priv->m_v_out);
+    m_priv->m_c.apply(m_priv->ins(), m_priv->outs());
 }
 
 namespace
 {
+cv::gapi::own::Scalar to_own(test::Scalar const& s) {
+    return {s.v[0], s.v[1], s.v[2], s.v[3]};
+}
+
 cv::gapi::own::Mat to_own(test::Mat mat) {
     return {mat.rows, mat.cols, mat.type, mat.data, mat.step};
 }
@@ -94,8 +133,8 @@ static cv::GComputation buildResizeComputation(test::Mat inMat, test::Mat outMat
 
 FluidResizeComputation::FluidResizeComputation(test::Mat inMat, test::Mat outMat, int interp)
     : FluidComputation(new Priv{buildResizeComputation(inMat, outMat, interp)
-                               ,{to_own(inMat)}
-                               ,{to_own(outMat)}
+                               ,to_own(inMat)
+                               ,to_own(outMat)
                                })
 {}
 
@@ -122,8 +161,8 @@ static cv::GComputation buildResizeRGB8UComputation(test::Mat inMat, test::Mat o
 
 FluidResizeRGB8UComputation::FluidResizeRGB8UComputation(test::Mat inMat, test::Mat outMat, int interp)
     : FluidComputation(new Priv{buildResizeRGB8UComputation(inMat, outMat, interp)
-                               ,{to_own(inMat)}
-                               ,{to_own(outMat)}
+                               ,to_own(inMat)
+                               ,to_own(outMat)
                                })
 {}
 
@@ -144,7 +183,7 @@ static cv::GComputation buildSplitComputation(int planes)
 
 FluidSplitComputation::FluidSplitComputation(test::Mat inMat, std::vector<test::Mat> outMats)
     : FluidComputation(new Priv{buildSplitComputation(outMats.size())
-                               ,{to_own(inMat)}
+                               ,to_own(inMat)
                                ,to_own(outMats)
                                })
 {}
@@ -158,8 +197,8 @@ static cv::GComputation buildChanToPlaneComputation(int chan)
 
 FluidChanToPlaneComputation::FluidChanToPlaneComputation(test::Mat inMat, test::Mat outMat, int chan)
     : FluidComputation(new Priv{buildChanToPlaneComputation(chan)
-                               ,{to_own(inMat)}
-                               ,{to_own(outMat)}
+                               ,to_own(inMat)
+                               ,to_own(outMat)
                                })
 {}
 
@@ -216,12 +255,60 @@ FluidI420toRGBComputation::FluidI420toRGBComputation(test::Mat inMat_y, test::Ma
 
 ConvertDepthComputation::ConvertDepthComputation(test::Mat inMat, test::Mat outMat,  int depth)
     : FluidComputation(new Priv{ [depth]()-> cv::GComputation {
-                                    cv::GMat in;
-                                    cv::GMat out = InferenceEngine::gapi::ConvertDepth::on(in, depth);
-                                    return cv::GComputation(cv::GIn(in), cv::GOut(out));
-                                 }()
-                               , {to_own(inMat)}
-                               , {to_own(outMat)}
+                                      cv::GMat in;
+                                      cv::GMat out = InferenceEngine::gapi::ConvertDepth::on(in, depth);
+                                      return cv::GComputation(cv::GIn(in), cv::GOut(out));
+                                  }()
+                               , to_own(inMat)
+                               , to_own(outMat)
                                })
 {}
 
+DivCComputation::DivCComputation(test::Mat inMat, test::Mat outMat, test::Scalar const& c)
+    : FluidComputation(new Priv{ []()-> cv::GComputation {
+                                      cv::GMat in;
+                                      cv::GScalar C;
+                                      cv::GMat out = in / C;
+                                      return cv::GComputation(cv::GIn(in, C), cv::GOut(out));
+                                  }()
+                                , cv::GRunArgs{cv::GRunArg{to_own(inMat)}, cv::GRunArg{to_own(c)}}
+                                , {to_own(outMat)}
+                               })
+{}
+
+SubCComputation::SubCComputation(test::Mat inMat, test::Mat outMat, test::Scalar const& c)
+    : FluidComputation(new Priv{ []()-> cv::GComputation{
+                                      cv::GMat in;
+                                      cv::GScalar C;
+                                      cv::GMat out = in - C;
+                                      return cv::GComputation(cv::GIn(in, C), cv::GOut(out));
+                                  }()
+                                , cv::GRunArgs{cv::GRunArg{to_own(inMat)}, cv::GRunArg{to_own(c)}}
+                                , {to_own(outMat)}
+                               })
+{}
+
+MeanValueSubtractComputation::MeanValueSubtractComputation(test::Mat inMat, test::Mat outMat, test::Scalar const& mean, test::Scalar const& std)
+    : FluidComputation(new Priv{ []()-> cv::GComputation{
+                                      cv::GMat in;
+                                      cv::GScalar _mean;
+                                      cv::GScalar _std;
+                                      cv::GMat out = (in - _mean) / _std;
+                                      return cv::GComputation(cv::GIn(in, _mean, _std), cv::GOut(out));
+                                  }()
+                                , cv::GRunArgs{cv::GRunArg{to_own(inMat)}, cv::GRunArg{to_own(mean)}, cv::GRunArg{to_own(std)}}
+                                , {to_own(outMat)}
+                               })
+{}
+
+namespace cv {
+cv::GMat operator-(const cv::GMat& lhs, const cv::GScalar& rhs)
+{
+    return InferenceEngine::gapi::GSubC::on(lhs, rhs, -1);
+}
+cv::GMat operator/(const cv::GMat& lhs, const cv::GScalar& rhs)
+{
+    return InferenceEngine::gapi::GDivC::on(lhs, rhs, 1.0, -1);
+}
+
+}
diff --git a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/fluid_test_computations.hpp b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/fluid_test_computations.hpp
index 57f554f8e9e..895889b3a88 100644
--- a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/fluid_test_computations.hpp
+++ b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/fluid_test_computations.hpp
@@ -9,6 +9,7 @@
 
 #include <memory>
 #include <vector>
+#include <array>
 
 #if defined(_WIN32)
     #ifdef IMPLEMENT_FLUID_COMPUTATION_API
@@ -43,6 +44,11 @@ struct Rect{
         return width == 0 && height == 0;
     };
 };
+struct Scalar
+{
+    std::array<double, 4> v;
+};
+
 }
 
 class FLUID_COMPUTATION_VISIBILITY FluidComputation
@@ -104,4 +110,22 @@ public:
     ConvertDepthComputation(test::Mat inMat, test::Mat outMat, int depth);
 };
 
+class FLUID_COMPUTATION_VISIBILITY DivCComputation : public FluidComputation
+{
+public:
+    DivCComputation(test::Mat inMat, test::Mat outMat, test::Scalar const& c);
+};
+
+class FLUID_COMPUTATION_VISIBILITY SubCComputation : public FluidComputation
+{
+public:
+    SubCComputation(test::Mat inMat, test::Mat outMat, test::Scalar const& c);
+};
+
+class FLUID_COMPUTATION_VISIBILITY MeanValueSubtractComputation : public FluidComputation
+{
+public:
+    MeanValueSubtractComputation(test::Mat inMat, test::Mat outMat, test::Scalar const& mean, test::Scalar const& std);
+};
+
 #endif // FLUID_TEST_COMPUTATIONS_HPP

From 07214d0a4752a579cf4b27d9be98fb8f81b45cfb Mon Sep 17 00:00:00 2001
From: Anastasia Popova <anastasia.popova@intel.com>
Date: Thu, 29 Apr 2021 14:05:35 +0300
Subject: [PATCH 10/73] Changed "out_port_id" attribute setting in mapping file
 to store tensor names. (#5344)

* Removed port id from fw_tensor_debug_info attribute.

* Added port number to tensor names in kaldi, mxnet. Fixed Const naming.

* Sort imports.
---
 .../back/SpecialNodesFinalization.py          |  6 +-
 .../extensions/front/onnx/loop_ext.py         |  4 +-
 model-optimizer/mo/front/caffe/loader.py      |  4 +-
 model-optimizer/mo/front/extractor.py         |  3 +-
 .../mo/front/kaldi/loader/utils.py            |  2 +-
 .../mo/front/mxnet/extractors/utils.py        |  4 +-
 model-optimizer/mo/front/onnx/loader.py       |  4 +-
 model-optimizer/mo/front/tf/extractor.py      |  2 +-
 model-optimizer/mo/graph/port.py              |  4 +-
 .../mo/utils/ir_reader/layer_to_class.py      |  4 +-
 .../extensions/back/ResultRename_test.py      |  4 +-
 .../extensions/front/output_cut_test.py       | 12 ++--
 .../unit_tests/mo/graph/connection_test.py    | 60 +++++++++----------
 .../unit_tests/mo/graph/port_test.py          | 17 +++---
 .../mo/utils/ir_reader/layer_to_class_test.py |  6 +-
 15 files changed, 69 insertions(+), 67 deletions(-)

diff --git a/model-optimizer/extensions/back/SpecialNodesFinalization.py b/model-optimizer/extensions/back/SpecialNodesFinalization.py
index 915c5670d74..01e95cbede9 100644
--- a/model-optimizer/extensions/back/SpecialNodesFinalization.py
+++ b/model-optimizer/extensions/back/SpecialNodesFinalization.py
@@ -2,12 +2,13 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import logging as log
+import re
 from collections import defaultdict
 
 import numpy as np
 
 from extensions.back.pass_separator import BackFinish
-from extensions.ops.tensor_iterator import TensorIterator, get_internal_node_by_layer_id
+from extensions.ops.tensor_iterator import TensorIterator
 from mo.back.replacement import BackReplacementPattern
 from mo.graph.graph import Graph
 from mo.ops.const import Const
@@ -77,7 +78,8 @@ class CreateConstNodesReplacement(BackReplacementPattern):
 
         if self._check_bin_attrs(node):
             if node.has_valid('value'):
-                const_node_name = graph.unique_id(node.id + '_const')
+                const_node_name = node.soft_get('name', node.id)
+                const_node_name = graph.unique_id(re.sub(r'\/Output_\d+\/Data_(.?)+', '', const_node_name))
                 log.debug("Added Const node '{}'".format(const_node_name))
                 const_node = Const(graph, {'name': const_node_name, 'value': node.value,
                                            'force_shape': node.soft_get('force_shape', None),
diff --git a/model-optimizer/extensions/front/onnx/loop_ext.py b/model-optimizer/extensions/front/onnx/loop_ext.py
index 8c692001ed3..372562d32c7 100644
--- a/model-optimizer/extensions/front/onnx/loop_ext.py
+++ b/model-optimizer/extensions/front/onnx/loop_ext.py
@@ -81,7 +81,7 @@ class LoopExtractor(FrontExtractorOp):
                     'out': src_port,
                     'in': dst_port,
                     'name': inp,
-                    'fw_tensor_debug_info': [(src_id, dst_port, inp)],
+                    'fw_tensor_debug_info': [(src_id, inp)],
                     'in_attrs': ['in', 'name'],
                     'out_attrs': ['out', 'name'],
                     'data_attrs': ['fw_tensor_debug_info']
@@ -136,7 +136,7 @@ class LoopExtractor(FrontExtractorOp):
             main_graph.add_edge(src_node, loop_node.id, **{'out': src_port,
                                                            'in': next_loop_input_port_idx,
                                                            'name': src_node,
-                                                           'fw_tensor_debug_info': [(src_node, next_loop_input_port_idx, tensor_name)],
+                                                           'fw_tensor_debug_info': [(src_node, tensor_name)],
                                                            'in_attrs': ['in', 'name'],
                                                            'out_attrs': ['out', 'name'],
                                                            'data_attrs': ['fw_tensor_debug_info']}
diff --git a/model-optimizer/mo/front/caffe/loader.py b/model-optimizer/mo/front/caffe/loader.py
index c2f670505cb..d513694a8e0 100644
--- a/model-optimizer/mo/front/caffe/loader.py
+++ b/model-optimizer/mo/front/caffe/loader.py
@@ -319,8 +319,8 @@ def add_edge_caffe(graph: Graph, bottom: str, dst_layer: str, blob_producers: di
         'out': src_port,
         'in': dst_port,
         'name': bottom,
-        # debug anchor for a framework name, out port and tensor name
-        'fw_tensor_debug_info': [(blob_producers[bottom][2], src_port, bottom)],
+        # debug anchor for a framework name and tensor name
+        'fw_tensor_debug_info': [(blob_producers[bottom][2], bottom)],
         'in_attrs': ['in', 'name'],
         'out_attrs': ['out', 'name'],
         'data_attrs': ['fw_tensor_debug_info']
diff --git a/model-optimizer/mo/front/extractor.py b/model-optimizer/mo/front/extractor.py
index 28d5b0973e4..acb5003d266 100644
--- a/model-optimizer/mo/front/extractor.py
+++ b/model-optimizer/mo/front/extractor.py
@@ -860,8 +860,9 @@ def add_input_op(graph: Graph, node_id: str, port: int = 0, data: bool = False,
     input_op = Parameter(graph, dict(shape=shape, data_type=data_type, initial_node_name=node_id,
                                         name=get_new_placeholder_name(node_id, is_out_port, port)))
 
+    fw_name = Node(graph, node_id).soft_get('name')
     edge_attrs = {'in': port, 'out': 0, 'in_attrs': ['in'], 'out_attrs': ['out'],
-                  'fw_tensor_debug_info': [(Node(graph, node_id).soft_get('name'), port)],
+                  'fw_tensor_debug_info': [(fw_name, fw_name)],
                   'data_attrs': ['fw_tensor_debug_info']}
     if not data:
         if is_out_port:
diff --git a/model-optimizer/mo/front/kaldi/loader/utils.py b/model-optimizer/mo/front/kaldi/loader/utils.py
index 5c9c0087a88..570d4726b6e 100644
--- a/model-optimizer/mo/front/kaldi/loader/utils.py
+++ b/model-optimizer/mo/front/kaldi/loader/utils.py
@@ -332,7 +332,7 @@ def create_edge_attrs(prev_layer_id: str, next_layer_id: str, tensor_name: str,
         'out': out_port,
         'in': in_port,
         'name': next_layer_id,
-        'fw_tensor_debug_info': [(prev_layer_id, out_port, tensor_name)],
+        'fw_tensor_debug_info': [(prev_layer_id, tensor_name + ":" + str(out_port))],
         'in_attrs': ['in', 'permutation'],
         'out_attrs': ['out', 'permutation'],
         'data_attrs': ['fw_tensor_debug_info']
diff --git a/model-optimizer/mo/front/mxnet/extractors/utils.py b/model-optimizer/mo/front/mxnet/extractors/utils.py
index 0727ae82c26..fe82b3421cf 100644
--- a/model-optimizer/mo/front/mxnet/extractors/utils.py
+++ b/model-optimizer/mo/front/mxnet/extractors/utils.py
@@ -114,8 +114,8 @@ def create_mxnet_edge(src_node_id: str, dst_node_id: str, src_port: int, dst_por
     edge_attrs = {
         'in': src_port,
         'out': dst_port,
-        # debug anchor for framework name, out port and tensor name
-        'fw_tensor_debug_info': [(framework_name, dst_port, framework_name)],
+        # debug anchor for framework name and tensor name
+        'fw_tensor_debug_info': [(framework_name, framework_name + ":" + str(dst_port))],
         'in_attrs': ['in'],
         'out_attrs': ['out'],
         'data_attrs': ['fw_tensor_debug_info']
diff --git a/model-optimizer/mo/front/onnx/loader.py b/model-optimizer/mo/front/onnx/loader.py
index 93fdee5e0e3..8429ea142a1 100644
--- a/model-optimizer/mo/front/onnx/loader.py
+++ b/model-optimizer/mo/front/onnx/loader.py
@@ -96,7 +96,7 @@ def protobuf2nx(graph: Graph, pb):
                 'out': src_port,
                 'in': dst_port,
                 'name': inp,
-                'fw_tensor_debug_info': [(src_id, src_port, inp)],
+                'fw_tensor_debug_info': [(src_id, inp)],
                 'in_attrs': ['in', 'name'],
                 'out_attrs': ['out', 'name'],
                 'data_attrs': ['fw_tensor_debug_info']
@@ -110,7 +110,7 @@ def protobuf2nx(graph: Graph, pb):
                     'out': src_port,
                     'in': 0,
                     'name': out,
-                    'fw_tensor_debug_info': [(fw_name, src_port, out)],
+                    'fw_tensor_debug_info': [(fw_name, out)],
                     'in_attrs': ['in', 'name'],
                     'out_attrs': ['out', 'name'],
                     'data_attrs': ['fw_tensor_debug_info']
diff --git a/model-optimizer/mo/front/tf/extractor.py b/model-optimizer/mo/front/tf/extractor.py
index 402dcc87025..52a1015f526 100644
--- a/model-optimizer/mo/front/tf/extractor.py
+++ b/model-optimizer/mo/front/tf/extractor.py
@@ -38,7 +38,7 @@ def create_tf_edge(src_node_id: str, dst_node_id: str, in_port: int):
         'in': in_port,
         'out': src_port,
         # debug anchor for a framework name, out port and tensor name
-        'fw_tensor_debug_info': [(src_node_id, src_port, tensor_name)],
+        'fw_tensor_debug_info': [(src_node_id, tensor_name)],
         'in_attrs': ['in', 'control_flow_edge', 'permutation'],
         'out_attrs': ['out', 'permutation'],
         'data_attrs': ['fw_tensor_debug_info'],
diff --git a/model-optimizer/mo/graph/port.py b/model-optimizer/mo/graph/port.py
index 51f5cbe0bda..c4859087f45 100644
--- a/model-optimizer/mo/graph/port.py
+++ b/model-optimizer/mo/graph/port.py
@@ -269,8 +269,8 @@ class Port:
                 if attrs['fw_tensor_debug_info'] is None:
                     return tensor_names_list
                 for attr in attrs['fw_tensor_debug_info']:
-                    if attr is not None and len(attr) >= 3:
-                        tensor_name = attr[2]
+                    if attr is not None and len(attr) >= 2:
+                        tensor_name = attr[1]
                         if tensor_name is not None and len(tensor_name) > 0:
                             tensor_names_list.append(tensor_name.replace(',', '\\,'))
             return tensor_names_list
diff --git a/model-optimizer/mo/utils/ir_reader/layer_to_class.py b/model-optimizer/mo/utils/ir_reader/layer_to_class.py
index 67bc917f747..a0487d44cfb 100644
--- a/model-optimizer/mo/utils/ir_reader/layer_to_class.py
+++ b/model-optimizer/mo/utils/ir_reader/layer_to_class.py
@@ -302,9 +302,9 @@ def restore_tensor_names(op: Node):
                 op.out_node(out_port)['fw_tensor_debug_info'] = []
                 for out_tensor_name in out_tensor_names:
                     out_tensor_name = out_tensor_name.replace(str_to_replace, ',')
-                    op.out_node(out_port)['fw_tensor_debug_info'].append((out_tensor_name, out_port, out_tensor_name))
+                    op.out_node(out_port)['fw_tensor_debug_info'].append((out_tensor_name, out_tensor_name))
             else:
-                op.out_node(out_port)['fw_tensor_debug_info'] = [(out_tensor_names, out_port, out_tensor_names)]
+                op.out_node(out_port)['fw_tensor_debug_info'] = [(out_tensor_names, out_tensor_names)]
 
 
 def copy_graph_with_ops(graph: Graph) -> Graph:
diff --git a/model-optimizer/unit_tests/extensions/back/ResultRename_test.py b/model-optimizer/unit_tests/extensions/back/ResultRename_test.py
index b38937fa624..52918333ce2 100644
--- a/model-optimizer/unit_tests/extensions/back/ResultRename_test.py
+++ b/model-optimizer/unit_tests/extensions/back/ResultRename_test.py
@@ -13,8 +13,8 @@ nodes = {
     **regular_op('Op2', {'type': 'Op2', 'kind': 'op', 'op': 'Op2'}),
     **result('result1'),
     **result('result2'),
-    'Op1_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op1', 0, 'Op1_tensor')]},
-    'Op2_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op1', 0, 'Op2_tensor')]},
+    'Op1_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op1', 'Op1_tensor')]},
+    'Op2_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op1', 'Op2_tensor')]},
 }
 
 
diff --git a/model-optimizer/unit_tests/extensions/front/output_cut_test.py b/model-optimizer/unit_tests/extensions/front/output_cut_test.py
index ff660917206..4d4fa68389c 100644
--- a/model-optimizer/unit_tests/extensions/front/output_cut_test.py
+++ b/model-optimizer/unit_tests/extensions/front/output_cut_test.py
@@ -22,7 +22,7 @@ class TestsOutputCut(unittest.TestCase):
     def test_case1(self):
         graph = build_graph(nodes, [('Parameter1', 'FakeOutput1',
                                      {'in': 0, 'out': 0, 'fw_tensor_debug_info':
-                                         [('Parameter1', 0, 'Parameter1_tensor_name')]})])
+                                         [('Parameter1', 'Parameter1_tensor_name')]})])
         graph.graph['packed_outputs'] = None
         graph.graph['user_shapes'] = None
 
@@ -31,18 +31,18 @@ class TestsOutputCut(unittest.TestCase):
 
         param1 = Node(graph, 'Parameter1')
         self.assertTrue(param1.out_node()['type'] == 'Result')
-        self.assertTrue(param1.out_edge()['fw_tensor_debug_info'] == [('Parameter1', 0, 'Parameter1_tensor_name')])
+        self.assertTrue(param1.out_edge()['fw_tensor_debug_info'] == [('Parameter1', 'Parameter1_tensor_name')])
         self.assertTrue(graph.get_op_nodes(name='FakeOutput1') == [])
 
     def test_case2(self):
         graph = build_graph(nodes, [('Parameter1', 'Op1'),
                                     ('Op1', 'FakeOutput1',
                                      {'in': 1, 'out': 1, 'fw_tensor_debug_info':
-                                         [('Op1', 0, 'Op1_tensor_name')]}),
+                                         [('Op1', 'Op1_tensor_name')]}),
                                     ('Parameter1', 'Op2'),
                                     ('Op2', 'FakeOutput2',
                                      {'in': 2, 'out': 3,
-                                      'fw_tensor_debug_info': [('Op2', 0, 'Op2_tensor_name')]})])
+                                      'fw_tensor_debug_info': [('Op2', 'Op2_tensor_name')]})])
         graph.graph['packed_outputs'] = None
         graph.graph['user_shapes'] = None
 
@@ -53,8 +53,8 @@ class TestsOutputCut(unittest.TestCase):
         op2 = Node(graph, 'Op2')
         self.assertTrue(op1.out_node(1)['type'] == 'Result')
         self.assertTrue(op2.out_node(3)['type'] == 'Result')
-        self.assertTrue(op1.out_edge(1)['fw_tensor_debug_info'] == [('Op1', 0, 'Op1_tensor_name')])
-        self.assertTrue(op2.out_edge(3)['fw_tensor_debug_info'] == [('Op2', 0, 'Op2_tensor_name')])
+        self.assertTrue(op1.out_edge(1)['fw_tensor_debug_info'] == [('Op1', 'Op1_tensor_name')])
+        self.assertTrue(op2.out_edge(3)['fw_tensor_debug_info'] == [('Op2', 'Op2_tensor_name')])
         self.assertTrue(graph.get_op_nodes(name='FakeOutput1') == [])
         self.assertTrue(graph.get_op_nodes(name='FakeOutput2') == [])
 
diff --git a/model-optimizer/unit_tests/mo/graph/connection_test.py b/model-optimizer/unit_tests/mo/graph/connection_test.py
index ca22e3def70..a755b1f901e 100644
--- a/model-optimizer/unit_tests/mo/graph/connection_test.py
+++ b/model-optimizer/unit_tests/mo/graph/connection_test.py
@@ -13,9 +13,9 @@ nodes = {
     **regular_op('Op2', {'type': 'Op2', 'kind': 'op', 'op': 'Op2'}),
     **regular_op('NewOp', {'type': 'NewOp', 'kind': 'op', 'op': 'NewOp'}),
 
-    'input_data': {'kind': 'data', 'fw_tensor_debug_info': [('input', 0, 'input')]},
-    'Op1_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op1', 0, 'Op1')]},
-    'Op2_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op2', 0, 'Op2')]},
+    'input_data': {'kind': 'data', 'fw_tensor_debug_info': [('input', 'input')]},
+    'Op1_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op1', 'Op1')]},
+    'Op2_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op2', 'Op2')]},
     'NewOp_data': {'kind': 'data'},
 }
 
@@ -33,9 +33,9 @@ class TestsFront(unittest.TestCase):
 
     def test_case1_merge(self):
         graph = build_graph(nodes,
-                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [
-            ('input', 'NewOp', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+            ('input', 'NewOp', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
 
         input_node = Node(graph, 'input')
         new_node = Node(graph, 'NewOp')
@@ -50,9 +50,9 @@ class TestsFront(unittest.TestCase):
 
     def test_case1_source(self):
         graph = build_graph(nodes, [
-            ('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+            ('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [
-            ('input', 'NewOp', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+            ('input', 'NewOp', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
 
         input_node = Node(graph, 'input')
         new_node = Node(graph, 'NewOp')
@@ -67,7 +67,7 @@ class TestsFront(unittest.TestCase):
 
     def test_case1_dest(self):
         graph = build_graph(nodes, [
-            ('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+            ('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [
             ('input', 'NewOp', {'in': 0, 'out': 0})])
 
@@ -84,9 +84,9 @@ class TestsFront(unittest.TestCase):
 
     def test_case2_merge(self):
         graph = build_graph(nodes,
-                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [
-            ('input', 'NewOp', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+            ('input', 'NewOp', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
 
         op1_node = Node(graph, 'Op1')
         new_node = Node(graph, 'NewOp')
@@ -101,9 +101,9 @@ class TestsFront(unittest.TestCase):
 
     def test_case2_source(self):
         graph = build_graph(nodes,
-                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [
-            ('input', 'NewOp', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+            ('input', 'NewOp', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
 
         op1_node = Node(graph, 'Op1')
         new_node = Node(graph, 'NewOp')
@@ -118,7 +118,7 @@ class TestsFront(unittest.TestCase):
 
     def test_case2_dest(self):
         graph = build_graph(nodes,
-                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [('input', 'NewOp', {'in': 0, 'out': 0})])
 
         op1_node = Node(graph, 'Op1')
@@ -134,9 +134,9 @@ class TestsFront(unittest.TestCase):
 
     def test_case3_merge(self):
         graph = build_graph(nodes,
-                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [
-            ('NewOp', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+            ('NewOp', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
 
         op1_node = Node(graph, 'Op1')
         new_node = Node(graph, 'NewOp')
@@ -151,7 +151,7 @@ class TestsFront(unittest.TestCase):
 
     def test_case3_source(self):
         graph = build_graph(nodes,
-                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [('NewOp', 'Op1', {'in': 0, 'out': 0})])
 
         op1_node = Node(graph, 'Op1')
@@ -167,9 +167,9 @@ class TestsFront(unittest.TestCase):
 
     def test_case3_dest(self):
         graph = build_graph(nodes,
-                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [
-            ('NewOp', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+            ('NewOp', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
 
         op1_node = Node(graph, 'Op1')
         new_node = Node(graph, 'NewOp')
@@ -184,9 +184,9 @@ class TestsFront(unittest.TestCase):
 
     def test_case4_merge(self):
         graph = build_graph(nodes,
-                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
         graph_ref = build_graph(nodes, [
-            ('NewOp', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input')]})])
+            ('NewOp', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input')]})])
 
         op1_node = Node(graph, 'Op1')
         new_node = Node(graph, 'NewOp')
@@ -427,7 +427,7 @@ class TestsMiddle(unittest.TestCase):
         graph_ref = build_graph(nodes, [('input', 'input_data'), ('NewOp', 'NewOp_data'), ('NewOp_data', 'Op1')])
 
         new_op_data = Node(graph_ref, 'NewOp_data')
-        new_op_data['fw_tensor_debug_info'] = [('input', 0, 'input')]
+        new_op_data['fw_tensor_debug_info'] = [('input', 'input')]
 
         input_data = Node(graph_ref, 'input_data')
         del input_data['fw_tensor_debug_info']
@@ -459,7 +459,7 @@ class TestsMiddle(unittest.TestCase):
         graph_ref = build_graph(nodes, [('input', 'input_data'), ('NewOp', 'NewOp_data'), ('NewOp_data', 'Op1')])
 
         new_op_data = Node(graph_ref, 'NewOp_data')
-        new_op_data['fw_tensor_debug_info'] = [('input', 0, 'input')]
+        new_op_data['fw_tensor_debug_info'] = [('input', 'input')]
 
         input_data = Node(graph_ref, 'input_data')
         del input_data['fw_tensor_debug_info']
@@ -478,7 +478,7 @@ class TestsMiddle(unittest.TestCase):
         graph_ref = build_graph(nodes, [('input', 'input_data'), ('NewOp', 'NewOp_data'), ('NewOp_data', 'Op1')])
 
         new_op_data = Node(graph_ref, 'NewOp_data')
-        new_op_data['fw_tensor_debug_info'] = [('input', 0, 'input')]
+        new_op_data['fw_tensor_debug_info'] = [('input', 'input')]
 
         op1_node = Node(graph, 'Op1')
         new_node = Node(graph, 'NewOp')
@@ -507,7 +507,7 @@ class TestsMiddle(unittest.TestCase):
         graph_ref = build_graph(nodes, [('input', 'input_data'), ('NewOp', 'NewOp_data'), ('NewOp_data', 'Op1')])
 
         new_op_data = Node(graph_ref, 'NewOp_data')
-        new_op_data['fw_tensor_debug_info'] = [('input', 0, 'input')]
+        new_op_data['fw_tensor_debug_info'] = [('input', 'input')]
 
         op1_node = Node(graph, 'Op1')
         new_node = Node(graph, 'NewOp')
@@ -525,7 +525,7 @@ class TestsMiddle(unittest.TestCase):
                                         ('Op1', 'Op1_data'), ('input_data', 'Op2')])
 
         input_data = Node(graph_ref, 'input_data')
-        input_data['fw_tensor_debug_info'] = [('input', 0, 'input'), ('Op1', 0, 'Op1')]
+        input_data['fw_tensor_debug_info'] = [('input', 'input'), ('Op1', 'Op1')]
 
         op1_data = Node(graph_ref, 'Op1_data')
         del op1_data['fw_tensor_debug_info']
@@ -544,7 +544,7 @@ class TestsMiddle(unittest.TestCase):
                                         ('Op1', 'Op1_data'), ('input_data', 'Op2')])
 
         input_data = Node(graph_ref, 'input_data')
-        input_data['fw_tensor_debug_info'] = [('input', 0, 'input')]
+        input_data['fw_tensor_debug_info'] = [('input', 'input')]
 
         op1_node = Node(graph, 'Op1')
         op1_node.out_port(0).get_connection().set_source(op1_node.in_port(0).get_source(), "source")
@@ -560,7 +560,7 @@ class TestsMiddle(unittest.TestCase):
                                         ('Op1', 'Op1_data'), ('input_data', 'Op2')])
 
         input_data = Node(graph_ref, 'input_data')
-        input_data['fw_tensor_debug_info'] = [('Op1', 0, 'Op1')]
+        input_data['fw_tensor_debug_info'] = [('Op1', 'Op1')]
 
         op1_data = Node(graph_ref, 'Op1_data')
         del op1_data['fw_tensor_debug_info']
@@ -579,7 +579,7 @@ class TestsMiddle(unittest.TestCase):
                                         ('Op1', 'Op1_data')])
 
         input_data = Node(graph_ref, 'input_data')
-        input_data['fw_tensor_debug_info'] = [('input', 0, 'input'), ('Op1', 0, 'Op1')]
+        input_data['fw_tensor_debug_info'] = [('input', 'input'), ('Op1', 'Op1')]
 
         op1_node = Node(graph, 'Op1')
         op1_node.in_port(0).get_connection().set_destination(op1_node.out_port(0).get_destination(), "merge")
@@ -595,7 +595,7 @@ class TestsMiddle(unittest.TestCase):
                                         ('Op1', 'Op1_data')])
 
         input_data = Node(graph_ref, 'input_data')
-        input_data['fw_tensor_debug_info'] = [('input', 0, 'input')]
+        input_data['fw_tensor_debug_info'] = [('input', 'input')]
 
         op1_node = Node(graph, 'Op1')
         op1_node.in_port(0).get_connection().set_destination(op1_node.out_port(0).get_destination(), "source")
@@ -611,7 +611,7 @@ class TestsMiddle(unittest.TestCase):
                                         ('Op1', 'Op1_data')])
 
         input_data = Node(graph_ref, 'input_data')
-        input_data['fw_tensor_debug_info'] = [('Op1', 0, 'Op1')]
+        input_data['fw_tensor_debug_info'] = [('Op1', 'Op1')]
 
         op1_node = Node(graph, 'Op1')
         op1_node.in_port(0).get_connection().set_destination(op1_node.out_port(0).get_destination(), "dest")
diff --git a/model-optimizer/unit_tests/mo/graph/port_test.py b/model-optimizer/unit_tests/mo/graph/port_test.py
index 524e38dd33e..1b8f9d83ba4 100644
--- a/model-optimizer/unit_tests/mo/graph/port_test.py
+++ b/model-optimizer/unit_tests/mo/graph/port_test.py
@@ -12,18 +12,18 @@ nodes = {
     **regular_op('Op2', {'type': 'Op2', 'kind': 'op', 'op': 'Op2'}),
     **regular_op('Op3', {'type': 'Op3', 'kind': 'op', 'op': 'Op3'}),
 
-    'input_data': {'kind': 'data', 'fw_tensor_debug_info': [('input', 0, 'input'), ('Op1', 0, 'Op1,Op2')]},
-    'Op1_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op1', 0, 'Op1,Op2')]},
+    'input_data': {'kind': 'data', 'fw_tensor_debug_info': [('input', 'input'), ('Op1', 'Op1,Op2')]},
+    'Op1_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op1', 'Op1,Op2')]},
     'Op2_data': {'kind': 'data'},
-    'Op3_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op3', 0, 'Op3')]},
+    'Op3_data': {'kind': 'data', 'fw_tensor_debug_info': [('Op3', 'Op3')]},
 }
 
 
 class TestsGetTensorNames(unittest.TestCase):
     def test_front(self):
         graph = build_graph(nodes,
-                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input'),
-                                                                                           ('Op1', 0, 'Op1,Op2')]})])
+                            [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input'),
+                                                                                           ('Op1', 'Op1,Op2')]})])
         graph.stage = 'front'
         input_node = Node(graph, 'input')
         self.assertTrue(input_node.out_port(0).get_tensor_names() == ['input', 'Op1\\,Op2'])
@@ -72,10 +72,9 @@ class TestsGetTensorNames(unittest.TestCase):
         self.assertTrue(op3_node.out_port(0).get_tensor_names() == ['Op3', 'input', 'Op1\\,Op2'])
 
     def test_reconnect_front_case1(self):
-        graph = build_graph(nodes, [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 0, 'input'),
-                                                                                                  ('Op1', 0,
-                                                                                                   'Op1,Op2')]}),
-                                    ('Op3', 'Op2', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('Op3', 0, 'Op3')]})])
+        graph = build_graph(nodes, [('input', 'Op1', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('input', 'input'),
+                                                                                                  ('Op1', 'Op1,Op2')]}),
+                                    ('Op3', 'Op2', {'in': 0, 'out': 0, 'fw_tensor_debug_info': [('Op3', 'Op3')]})])
         graph.stage = 'front'
         input_node = Node(graph, 'input')
 
diff --git a/model-optimizer/unit_tests/mo/utils/ir_reader/layer_to_class_test.py b/model-optimizer/unit_tests/mo/utils/ir_reader/layer_to_class_test.py
index b2f602c5de4..2dab5c3b35e 100644
--- a/model-optimizer/unit_tests/mo/utils/ir_reader/layer_to_class_test.py
+++ b/model-optimizer/unit_tests/mo/utils/ir_reader/layer_to_class_test.py
@@ -126,7 +126,7 @@ class TestFunction(unittest.TestCase):
         node_2 = Node(graph, 'add_data')
         node_3 = Node(graph, 'add_const_data')
 
-        assert node_1['fw_tensor_debug_info'] == [('abc', 0, 'abc'), ('def', 0, 'def')], 'Restored debug info is wrong!'
-        assert node_2['fw_tensor_debug_info'] == [('ghi,jkl', 0, 'ghi,jkl')], 'Restored debug info is wrong!'
-        assert node_3['fw_tensor_debug_info'] == [('mno', 0, 'mno'), ('pqr,stu', 0, 'pqr,stu')],\
+        assert node_1['fw_tensor_debug_info'] == [('abc', 'abc'), ('def', 'def')], 'Restored debug info is wrong!'
+        assert node_2['fw_tensor_debug_info'] == [('ghi,jkl', 'ghi,jkl')], 'Restored debug info is wrong!'
+        assert node_3['fw_tensor_debug_info'] == [('mno', 'mno'), ('pqr,stu', 'pqr,stu')],\
             'Restored debug info is wrong!'

From 6624a77827557f364dd196b9ca7a18188c1f16c7 Mon Sep 17 00:00:00 2001
From: Mikhail Nosov <mikhail.nosov@intel.com>
Date: Thu, 29 Apr 2021 14:10:21 +0300
Subject: [PATCH 11/73] Disabled sporadically failed GNAAOTTests (#5455)

---
 .../unit/engines/gna/gna_graph_aot_test.cpp           | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
index a9c8844f11c..b23e6e68a77 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
@@ -76,7 +76,8 @@ TEST_F(GNAAOTTests, DISABLED_AffineWith2AffineOutputs_canbe_imported_verify_stru
 
 }
 
-TEST_F(GNAAOTTests, TwoInputsModel_canbe_export_imported) {
+TEST_F(GNAAOTTests, DISABLED_TwoInputsModel_canbe_export_imported) {
+    // Disabled because of random conflicts with other tests: Issue-54220
 #if GNA_LIB_VER == 1
     GTEST_SKIP();
 #endif
@@ -95,8 +96,8 @@ TEST_F(GNAAOTTests, TwoInputsModel_canbe_export_imported) {
             .inNotCompactMode().gna().propagate_forward().called().once();
 }
 
-TEST_F(GNAAOTTests, PermuteModel_canbe_export_imported) {
-
+TEST_F(GNAAOTTests, DISABLED_PermuteModel_canbe_export_imported) {
+    // Disabled because of random conflicts with other tests: Issue-54220
 #if GNA_LIB_VER == 1
     GTEST_SKIP();
 #endif
@@ -112,8 +113,8 @@ TEST_F(GNAAOTTests, PermuteModel_canbe_export_imported) {
             .inNotCompactMode().gna().propagate_forward().called().once();
 }
 
-TEST_F(GNAAOTTests, PoolingModel_canbe_export_imported) {
-
+TEST_F(GNAAOTTests, DISABLED_PoolingModel_canbe_export_imported) {
+    // Disabled because of random conflicts with other tests: Issue-54220
 #if GNA_LIB_VER == 1
     GTEST_SKIP();
 #endif

From 7b52e3155af64f46e1a25f6d287e0c4cfb03374a Mon Sep 17 00:00:00 2001
From: Svetlana Dolinina <svetlana.a.dolinina@intel.com>
Date: Thu, 29 Apr 2021 14:38:08 +0300
Subject: [PATCH 12/73] Tdnnf (#5255)

* initial changes (IR not generated)

* extractor fix

* convert tdnnf (with correct infer)

* refactoring + comments in code

* added unit tests + couple fixes based on tests

* change order for old convolutions

* fix pylint

* small refactoring

* added if to remove changes in old irs

* doc updated

* fix layout and kernel shapes for old convolutions

* fixed test

* moved test

* fix import in test

* fixed backward compatibility

* review fixes
---
 .../Supported_Frameworks_Layers.md            |   1 +
 model-optimizer/automation/package_BOM.txt    |   3 +
 .../kaldi/add_reshape_around_convolution.py   |  53 +--
 .../kaldi/replace_timeheightconvolution.py    | 103 ++++++
 .../middle/ReplaceMemoryOffsetWithSplice.py   |   4 +
 .../extractors/batchnorm_component_ext.py     |  17 +-
 .../extractors/timeheightconvolution_ext.py   |  62 ++++
 .../mo/front/kaldi/loader/utils.py            |   1 +
 model-optimizer/mo/front/kaldi/utils.py       |   7 +
 .../mo/ops/timeheightconvolution.py           |  19 +
 .../replace_timeheightconvolution_test.py     | 324 ++++++++++++++++++
 .../convolutional_component_ext_test.py       |   4 +-
 12 files changed, 565 insertions(+), 33 deletions(-)
 create mode 100644 model-optimizer/extensions/front/kaldi/replace_timeheightconvolution.py
 create mode 100644 model-optimizer/mo/front/kaldi/extractors/timeheightconvolution_ext.py
 create mode 100644 model-optimizer/mo/ops/timeheightconvolution.py
 create mode 100644 model-optimizer/unit_tests/extensions/front/kaldi/replace_timeheightconvolution_test.py

diff --git a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
index bb382a57f88..a711292e462 100644
--- a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
+++ b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
@@ -383,6 +383,7 @@ Standard Kaldi\* Layers:
 | splicecomponent | No |
 | tanhcomponent | No |
 | tdnncomponent | No |
+| timeheightconvolutioncomponent | No |
 
 
 ## ONNX\* Supported Operators
diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index ae873347980..1225f24be27 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -160,6 +160,7 @@ extensions/front/kaldi/memoryoffset_batch_update.py
 extensions/front/kaldi/replace_eltwise_nin1.py
 extensions/front/kaldi/replace_lstm_node_pattern.py
 extensions/front/kaldi/replace_lstm_nonlinearity.py
+extensions/front/kaldi/replace_timeheightconvolution.py
 extensions/front/kaldi/set_ports.py
 extensions/front/kaldi/sigmoid_ext.py
 extensions/front/kaldi/split_recurrent_memoryoffset.py
@@ -865,6 +866,7 @@ mo/front/kaldi/extractors/softmax_ext.py
 mo/front/kaldi/extractors/specaugment_component_ext.py
 mo/front/kaldi/extractors/splice_component_ext.py
 mo/front/kaldi/extractors/tdnncomponent_ext.py
+mo/front/kaldi/extractors/timeheightconvolution_ext.py
 mo/front/kaldi/loader/__init__.py
 mo/front/kaldi/loader/loader.py
 mo/front/kaldi/loader/utils.py
@@ -977,6 +979,7 @@ mo/ops/squeeze.py
 mo/ops/strided_slice.py
 mo/ops/tdnncomponent.py
 mo/ops/tile.py
+mo/ops/timeheightconvolution.py
 mo/ops/unsqueeze.py
 mo/pipeline/__init__.py
 mo/pipeline/common.py
diff --git a/model-optimizer/extensions/front/kaldi/add_reshape_around_convolution.py b/model-optimizer/extensions/front/kaldi/add_reshape_around_convolution.py
index 42ef5d4da06..2769f765911 100644
--- a/model-optimizer/extensions/front/kaldi/add_reshape_around_convolution.py
+++ b/model-optimizer/extensions/front/kaldi/add_reshape_around_convolution.py
@@ -1,11 +1,9 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import numpy as np
-
-from extensions.ops.Cast import Cast
 from extensions.ops.elementwise import Div
-from mo.front.common.partial_infer.utils import int64_array, float32_array
+from extensions.ops.transpose import Transpose
+from mo.front.common.partial_infer.utils import int64_array
 from mo.front.common.replacement import FrontReplacementPattern
 from mo.front.tf.graph_utils import create_op_with_const_inputs, create_op_node_with_second_input
 from mo.graph.graph import Graph
@@ -45,32 +43,45 @@ class ReplaceConvolutionReshape(FrontReplacementPattern):
         node = match['conv']
         node_name = node.soft_get('name', node.id)
 
-        dst_dtype = np.float32  # even if data_type=FP16 use float32 for shape values
-
         # create Reshape before convolution
-        # shape = [in_shape[0], in_shape[1]/patch_stride, 1, patch_stride]
-        i_shape = Shape(graph, {'name': node_name + '/Shape'}).create_node()
-        shape = Cast(graph, {'name': node_name + '/to_float',
-                             'dst_type': dst_dtype}).create_node()
-        i_shape.in_port(0).connect(node.in_port(0).get_source())
-        shape.in_port(0).connect(i_shape.out_port(0))
+        # if transpose will be applied (new models)
+        #   shape = [in_shape[0], t= in_shape[1]/(patch_stride*t), patch_stride, C=1]
+        # else (for old models to avoid fails on GNA - should be removed as soon as GNA will be changed)
+        #   shape = [in_shape[0], t= in_shape[1]/(patch_stride*t), C=1, patch_stride]
+        sp_dim_1 = 1
+        if node.has_valid('patch_stride'):
+            channel_dim = 2
+            sp_dim_2 = 3
+            frame_height = node.patch_stride
+        else:
+            channel_dim = 3
+            sp_dim_2 = 2
+            frame_height = node.height_in
 
-        N, H = node_to_get_shape_value_of_indices(shape, [0]), node_to_get_shape_value_of_indices(shape, [1])
+        i_shape = Shape(graph, {'name': node_name + '/Shape'}).create_node()
+        i_shape.in_port(0).connect(node.in_port(0).get_source())
+
+        N, H = node_to_get_shape_value_of_indices(i_shape, [0]), node_to_get_shape_value_of_indices(i_shape, [1])
 
         div = create_op_with_const_inputs(
-            graph, Div, {1: float32_array([node.patch_stride])}, {'name': node_name + '/div_stride_h'})
+            graph, Div, {1: int64_array([frame_height * node.kernel[1]])}, {'name': node_name + '/div_stride_h'})
         div.in_port(0).connect(H.out_port(0))
 
-        concat = create_op_with_const_inputs(graph, Concat, {2: float32_array([1]), 3: float32_array([node.patch_stride])},
+        concat = create_op_with_const_inputs(graph, Concat, {sp_dim_2: int64_array([frame_height]),
+                                                             channel_dim: int64_array([node.kernel[1]])},
                                              {'name': node_name + '/concat_all_dims', 'in_ports_count': 4, 'axis': 0})
         concat.in_port(0).connect(N.out_port(0))
-        concat.in_port(1).connect(div.out_port(0))
-
-        reshape_pattern = Cast(graph, {'name': node_name + '/to_int', 'dst_type': np.int64}).create_node()
-        concat.out_port(0).connect(reshape_pattern.in_port(0))
+        concat.in_port(sp_dim_1).connect(div.out_port(0))
 
         reshape_in = Reshape(graph, {'name': node_name + '/reshape_in'}).create_node()
-        reshape_in.in_port(1).connect(reshape_pattern.out_port(0))
+        reshape_in.in_port(1).connect(concat.out_port(0))
+
+        # change layout from NHWC to NCHW
+        # should be replaced by common Permute logic in future
+        transpose = None
+        if channel_dim == 3 and node.channel_dims == 1:
+            transpose = create_op_node_with_second_input(graph, Transpose, int64_array([0, 3, 1, 2]),
+                                                         {'name': node.name + '/Transpose'}, reshape_in)
 
         # create Reshape after Convolution
         reshape_out = create_op_node_with_second_input(graph, Reshape, int64_array([0, -1]),
@@ -78,7 +89,7 @@ class ReplaceConvolutionReshape(FrontReplacementPattern):
 
         # connect input_reshape_node
         source = node.in_port(0).get_source()
-        node.in_port(0).get_connection().set_source(reshape_in.out_port(0))
+        node.in_port(0).get_connection().set_source(transpose.out_port(0) if transpose else reshape_in.out_port(0))
         reshape_in.in_port(0).connect(source)
         # connect output_reshape_node
         node.out_port(0).get_connection().set_source(reshape_out.out_port(0))
diff --git a/model-optimizer/extensions/front/kaldi/replace_timeheightconvolution.py b/model-optimizer/extensions/front/kaldi/replace_timeheightconvolution.py
new file mode 100644
index 00000000000..2e125c43364
--- /dev/null
+++ b/model-optimizer/extensions/front/kaldi/replace_timeheightconvolution.py
@@ -0,0 +1,103 @@
+# Copyright (C) 2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Node, Graph, rename_node
+from mo.ops.concat import Concat
+from mo.ops.convolution import Convolution
+from mo.ops.memoryoffset import MemoryOffset
+
+
+class ReplaceTimeHeightConvolutionPattern(FrontReplacementPattern):
+    enabled = True
+    run_not_recursively = True
+
+    def run_after(self):
+        from extensions.front.MoveEmbeddedInputsToInputs import MoveEmbeddedInputsToInputs
+        return [MoveEmbeddedInputsToInputs]
+
+    def run_before(self):
+        from extensions.front.kaldi.add_permute_after_convolution import ReplaceConvolutionTranspose
+        from extensions.front.kaldi.add_reshape_around_convolution import ReplaceConvolutionReshape
+        from extensions.front.kaldi.memory_offset_adjustment import MemoryOffsetAdjustment
+        from extensions.front.kaldi.split_recurrent_memoryoffset import SplitRecurrentMemoryOffset
+        return [MemoryOffsetAdjustment, ReplaceConvolutionReshape, ReplaceConvolutionTranspose,
+                SplitRecurrentMemoryOffset]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for node in graph.get_op_nodes(op='timeheightconvolutioncomponent'):
+            self.replace_timeheightconv(graph, node)
+
+    def replace_timeheightconv(self, graph: Graph, node: Node):
+        req_time_offsets = node.soft_get('time_offsets')
+        offsets = node.soft_get("offsets", [[]])
+        all_time_offsets = list(set(offsets[:, 0]))
+        all_time_offsets.sort()
+        in_name = node.soft_get('name', node.id)
+        rename_node(node, in_name + '/to_delete')
+
+        # create memoryoffsets for context gathering
+        # we need concat if time offsets more than 1
+        concat = Concat(graph, attrs={'name': in_name + '/Concat',
+                                      'in_ports_count': len(all_time_offsets)}).create_node()
+        i = 0
+        for t in all_time_offsets:
+            # if time offset included in required_time_offsets we don't need default value
+            has_default = t not in req_time_offsets
+            memoff = MemoryOffset(graph, attrs={'name': in_name + '/MemoryOffset_' + str(i),
+                                                't': t, 'has_default': has_default, 'splitted': False,
+                                                'pair_name': in_name + '/MemoryOffset_pair_' + str(i)}).create_node()
+            concat.in_port(i).connect(memoff.out_port(0))
+            memoff.in_port(0).connect(node.in_port(0).get_source())
+            i = i + 1
+
+        stride = node.soft_get("height_subsample", 1)
+
+        kernel = int64_array([0, 0])
+        kernel[0] = len(set(offsets[:, 0]))
+        kernel[1] = len(set(offsets[:, 1]))
+
+        pad_h = int64_array([0, 0])
+        pad_h[0] = -min(offsets[:, 1]) if min(offsets[:, 1]) < 0 else 0
+        pad_h[1] = stride * node.height_out - (node.height_in - max([max(offsets[:, 1]), 0]))
+
+        dilation_t = (max(offsets[:, 0]) - min(offsets[:, 0])) / (kernel[0] - 1) if kernel[0] > 1 else 1
+        dilation_h = (max(offsets[:, 1]) - min(offsets[:, 1])) / (kernel[1] - 1) if kernel[0] > 1 else 1
+
+        conv_attrs = {
+            'name': in_name,
+            'output': node['out_channels'],
+            'height_in': node.height_in,
+            'bias_term': None,
+            'pad': int64_array([[0, 0], [0, 0], [0, 0], pad_h]),
+            'pad_spatial_shape': int64_array([[0, 0], pad_h]),
+            'dilation': int64_array([1, 1, dilation_t, dilation_h]),
+            'kernel': int64_array([node.out_channels, node.in_channels, kernel[0], kernel[1]]),
+            'stride': int64_array([1, 1, 1, stride]),
+            'kernel_spatial': kernel,
+            'input_feature_channel': 1,
+            'output_feature_channel': 0,
+            'channel_dims': int64_array([1]),
+            'spatial_dims': int64_array([2, 3]),
+            'batch_dims': int64_array([0]),
+            'kernel_spatial_idx': int64_array([2, 3]),
+            'group': 1,
+            'reshape_kernel': True,
+            'bias_addable': True,
+        }
+        conv = Convolution(graph, attrs=conv_attrs).create_node()
+        conv.in_port(0).connect(concat.out_port(0))
+        conv.in_port(1).connect(node.in_port(1).get_source())
+
+        # change layout for weights from OHWI to OIHW
+        # in future should be replaced by common Permute mechanics
+        weights = conv.in_port(1).get_source().node.value
+        weights = weights.reshape(int64_array([node.out_channels, -1, node.in_channels]))
+        weights = weights.transpose(int64_array([0, 2, 1]))
+        weights = weights.flatten()
+        conv.in_port(1).get_source().node.value = weights
+
+        conv.in_port(2).connect(node.in_port(2).get_source())
+        node.out_port(0).get_connection().set_source(conv.out_port(0))
+        graph.remove_node(node.id)
diff --git a/model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice.py b/model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice.py
index 6a70e5337ed..f1172d78649 100644
--- a/model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice.py
+++ b/model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice.py
@@ -26,6 +26,10 @@ class ReplaceMemoryOffsetNodePattern(MiddleReplacementPattern):
         from extensions.middle.RemoveDuplicationMemory import RemoveMemoryDuplicationPattern
         return [RemoveMemoryDuplicationPattern]
 
+    def run_after(self):
+        from extensions.middle.split_tdnn_memoryoffset import SplitTdnnMemoryOffset
+        return [SplitTdnnMemoryOffset]
+
     @staticmethod
     def pattern():
         return dict(
diff --git a/model-optimizer/mo/front/kaldi/extractors/batchnorm_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/batchnorm_component_ext.py
index cc0fba3daf5..eac07ce8b5d 100644
--- a/model-optimizer/mo/front/kaldi/extractors/batchnorm_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/batchnorm_component_ext.py
@@ -5,11 +5,9 @@ import numpy as np
 
 from mo.front.caffe.extractors.utils import embed_input
 from mo.front.extractor import FrontExtractorOp
-from mo.front.kaldi.loader.utils import read_binary_bool_token, read_binary_integer32_token, collect_until_token, \
-    read_binary_float_token
+from mo.front.kaldi.loader.utils import collect_until_token, read_binary_float_token, read_binary_integer32_token
 from mo.front.kaldi.utils import read_binary_vector
 from mo.ops.scale_shift import ScaleShiftOp
-from mo.utils.error import Error
 
 
 class BatchNormComponentFrontExtractor(FrontExtractorOp):
@@ -26,18 +24,12 @@ class BatchNormComponentFrontExtractor(FrontExtractorOp):
         collect_until_token(pb, b'<BlockDim>')
         block_dim = read_binary_integer32_token(pb)
 
-        if block_dim != dim:
-            raise Error("Dim is not equal BlockDim for BatchNorm is not supported")
-
         collect_until_token(pb, b'<Epsilon>')
         eps = read_binary_float_token(pb)
 
         collect_until_token(pb, b'<TargetRms>')
         target_rms = read_binary_float_token(pb)
 
-        collect_until_token(pb, b'<TestMode>')
-        test_mode = read_binary_bool_token(pb)
-
         collect_until_token(pb, b'<StatsMean>')
         mean = read_binary_vector(pb)
 
@@ -47,8 +39,13 @@ class BatchNormComponentFrontExtractor(FrontExtractorOp):
         scale = target_rms / np.sqrt(var + eps)
 
         shift = - target_rms * mean / np.sqrt(var + eps)
-        attrs = {'out-size': len(shift)}
+
+        scale = np.tile(scale, dim // block_dim)
+        shift = np.tile(shift, dim // block_dim)
+
+        attrs = {'out-size': dim}
         embed_input(attrs, 1, 'weights', scale)
         embed_input(attrs, 2, 'biases', shift)
+
         ScaleShiftOp.update_node_stat(node, attrs)
         return cls.enabled
diff --git a/model-optimizer/mo/front/kaldi/extractors/timeheightconvolution_ext.py b/model-optimizer/mo/front/kaldi/extractors/timeheightconvolution_ext.py
new file mode 100644
index 00000000000..e47c1e4fa5c
--- /dev/null
+++ b/model-optimizer/mo/front/kaldi/extractors/timeheightconvolution_ext.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+import numpy as np
+
+from mo.front.caffe.extractors.utils import embed_input
+from mo.front.extractor import FrontExtractorOp
+from mo.front.kaldi.loader.utils import collect_until_token, read_token_value
+from mo.front.kaldi.utils import read_binary_matrix, read_binary_vector, read_binary_vector_of_pairs
+from mo.ops.timeheightconvolution import TimeHeightConvolutionComponent
+
+
+class TimeHeightConvolutionFrontExtractor(FrontExtractorOp):
+    op = 'timeheightconvolutioncomponent'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        pb = node.parameters
+        collect_until_token(pb, b'<ConvolutionModel>')
+        in_shape = read_token_value(pb, b'<NumFiltersIn>')
+        out_shape = read_token_value(pb, b'<NumFiltersOut>')
+        height_in = read_token_value(pb, b'<HeightIn>')
+        height_out = read_token_value(pb, b'<HeightOut>')
+        height_subsample = read_token_value(pb, b'<HeightSubsampleOut>')
+        collect_until_token(pb, b'<Offsets>')
+        offsets = read_binary_vector_of_pairs(pb, read_token=False, dtype=np.int32)
+        collect_until_token(pb, b'<RequiredTimeOffsets>')
+        time_offsets = read_binary_vector(pb, read_token=False, dtype=np.int32)
+        collect_until_token(pb, b'<LinearParams>')
+        weights, _ = read_binary_matrix(pb)
+        collect_until_token(pb, b'<BiasParams>')
+        biases = read_binary_vector(pb)
+
+        offsets = offsets.reshape([len(offsets)//2, 2])
+        mapping_rule = {  # stride for h axis
+                        'height_subsample': height_subsample,
+                        # input dimension for h axis
+                        'height_in': height_in,
+                        # output dimension for h axis
+                        'height_out': height_out,
+                        # input dimension for channel axis
+                        'in_channels': in_shape,
+                        # output dimension for channel axis
+                        'out_channels': out_shape,
+                        # array with pairs like the following
+                        # [ (-1, -1) (-1, 0) (-1, 1)
+                        #   (0, -1)  (0, 0)  (0, 1)
+                        #   (1, -1)  (1, 0)  (1, 1)]
+                        #  it means that kernel 3x3 will be applied to calculate current value of output
+                        'offsets': offsets,
+                        # required time offsets to calculate current convolution
+                        # time_offsets = [-1, 0, 1] for previous example means no padding for time axis and
+                        # 3 values should be prepared
+                        # time_offsets = [0] means zero padding [1, 1] for time axis
+                        'time_offsets': time_offsets,
+                        'out-size': out_shape * height_out}
+
+        embed_input(mapping_rule, 1, 'weights', weights)
+        embed_input(mapping_rule, 2, 'biases', biases)
+
+        TimeHeightConvolutionComponent.update_node_stat(node, mapping_rule)
+        return cls.enabled
diff --git a/model-optimizer/mo/front/kaldi/loader/utils.py b/model-optimizer/mo/front/kaldi/loader/utils.py
index 570d4726b6e..11f0cc14208 100644
--- a/model-optimizer/mo/front/kaldi/loader/utils.py
+++ b/model-optimizer/mo/front/kaldi/loader/utils.py
@@ -52,6 +52,7 @@ supported_components = [
     'sumgroupcomponent',
     'tanhcomponent',
     'tdnncomponent',
+    'timeheightconvolutioncomponent',
 ]
 
 
diff --git a/model-optimizer/mo/front/kaldi/utils.py b/model-optimizer/mo/front/kaldi/utils.py
index bd64473fd4c..886921582d6 100644
--- a/model-optimizer/mo/front/kaldi/utils.py
+++ b/model-optimizer/mo/front/kaldi/utils.py
@@ -28,6 +28,13 @@ def read_binary_vector(file_desc: io.BufferedReader, read_token: bool = True, dt
     return read_blob(file_desc, elements_number, dtype)
 
 
+def read_binary_vector_of_pairs(file_desc: io.BufferedReader, read_token: bool = True, dtype=np.float32):
+    if read_token:
+        read_placeholder(file_desc)
+    elements_number = read_binary_integer32_token(file_desc)
+    return read_blob(file_desc, 2 * elements_number, dtype)
+
+
 def read_learning_info(pb: io.BufferedReader):
     while True:
         read_placeholder(pb, 1)
diff --git a/model-optimizer/mo/ops/timeheightconvolution.py b/model-optimizer/mo/ops/timeheightconvolution.py
new file mode 100644
index 00000000000..a110172a647
--- /dev/null
+++ b/model-optimizer/mo/ops/timeheightconvolution.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from mo.graph.graph import Graph
+from mo.ops.op import Op
+
+
+class TimeHeightConvolutionComponent(Op):
+    op = 'timeheightconvolutioncomponent'
+    enabled = False
+
+    def __init__(self, graph: Graph, attrs: dict):
+        super().__init__(graph, {
+            'type': None,
+            'op': self.op,
+            'infer': None,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+        }, attrs)
diff --git a/model-optimizer/unit_tests/extensions/front/kaldi/replace_timeheightconvolution_test.py b/model-optimizer/unit_tests/extensions/front/kaldi/replace_timeheightconvolution_test.py
new file mode 100644
index 00000000000..81ce42f5350
--- /dev/null
+++ b/model-optimizer/unit_tests/extensions/front/kaldi/replace_timeheightconvolution_test.py
@@ -0,0 +1,324 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+import numpy as np
+
+from extensions.front.kaldi.replace_timeheightconvolution import ReplaceTimeHeightConvolutionPattern
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph, regular_op, connect_front, const
+
+
+class TimeheightconvolutionReplacerTest(unittest.TestCase):
+    nodes = {
+        **regular_op('placeholder', {}),
+        **regular_op('timeheightconv', {'op': 'timeheightconvolutioncomponent'}),
+        **const('weights', int64_array([])),
+        **const('biases', int64_array([])),
+        **regular_op('placeholder_out', {}),
+
+        **regular_op('concat', {'type': 'Concat', 'axis': 1}),
+        **regular_op('memoryoffset_0', {'type': None, 'op': 'MemoryOffset', 't': -1, 'has_default': False}),
+        **regular_op('memoryoffset_1', {'type': None, 'op': 'MemoryOffset', 't': 0, 'has_default': False}),
+        **regular_op('memoryoffset_2', {'type': None, 'op': 'MemoryOffset', 't': 1, 'has_default': True}),
+        **regular_op('conv', {'op': 'Convolution', 'type': 'Convolution', 'output': 12, 'height_in': 80}),
+    }
+
+    def test_timeheightconvolution_1offset(self):
+        graph = build_graph(self.nodes, [
+            *connect_front('placeholder', '0:timeheightconv'),
+            *connect_front('weights', '1:timeheightconv'),
+            *connect_front('biases', '2:timeheightconv'),
+            *connect_front('timeheightconv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+
+        conv = graph.nodes['timeheightconv']
+        conv['height_subsample'] = 1
+        conv['height_in'] = 80
+        conv['height_out'] = 80
+        conv['in_channels'] = 1
+        conv['out_channels'] = 12
+        conv['offsets'] = int64_array([[-1, -1], [-1, 0], [-1, 1]])
+        conv['time_offsets'] = [-1]
+        graph.nodes['weights']['value'] = np.zeros([36])
+
+        ref_graph = build_graph(self.nodes, [
+            *connect_front('placeholder', 'memoryoffset_0'),
+            *connect_front('memoryoffset_0', '0:concat'),
+            *connect_front('concat', '0:conv'),
+            *connect_front('weights', '1:conv'),
+            *connect_front('biases', '2:conv'),
+            *connect_front('conv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+        ref_graph.nodes['weights']['value'] = np.zeros([36])
+        new_conv = ref_graph.nodes['conv']
+        new_conv['pad'] = int64_array([[0, 0], [0, 0], [0, 0], [1, 1]])
+        new_conv['dilation'] = int64_array([1, 1, 1, 1])
+        new_conv['kernel'] = int64_array([12, 1, 1, 3])
+        new_conv['stride'] = int64_array([1, 1, 1, 1])
+
+
+        ReplaceTimeHeightConvolutionPattern().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder_out', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_timeheightconvolution_2_offsets(self):
+        graph = build_graph(self.nodes, [
+            *connect_front('placeholder', '0:timeheightconv'),
+            *connect_front('weights', '1:timeheightconv'),
+            *connect_front('biases', '2:timeheightconv'),
+            *connect_front('timeheightconv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+
+        conv = graph.nodes['timeheightconv']
+        conv['height_subsample'] = 1
+        conv['height_in'] = 80
+        conv['height_out'] = 80
+        conv['in_channels'] = 1
+        conv['out_channels'] = 12
+        conv['offsets'] = int64_array([[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 0], [0, 1]])
+        conv['time_offsets'] = int64_array([-1, 0])
+        graph.nodes['weights']['value'] = np.zeros([72])
+
+        ref_graph = build_graph(self.nodes, [
+            *connect_front('placeholder', 'memoryoffset_0'),
+            *connect_front('placeholder', 'memoryoffset_1'),
+            *connect_front('memoryoffset_0', '0:concat'),
+            *connect_front('memoryoffset_1', '1:concat'),
+            *connect_front('concat', '0:conv'),
+            *connect_front('weights', '1:conv'),
+            *connect_front('biases', '2:conv'),
+            *connect_front('conv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+        ref_graph.nodes['weights']['value'] = np.zeros([72])
+        new_conv = ref_graph.nodes['conv']
+        new_conv['pad'] = int64_array([[0, 0], [0, 0], [0, 0], [1, 1]])
+        new_conv['dilation'] = int64_array([1, 1, 1, 1])
+        new_conv['kernel'] = int64_array([12, 1, 2, 3])
+        new_conv['stride'] = int64_array([1, 1, 1, 1])
+
+        ReplaceTimeHeightConvolutionPattern().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder_out', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_timeheightconvolution_2_offsets_def(self):
+        graph = build_graph(self.nodes, [
+            *connect_front('placeholder', '0:timeheightconv'),
+            *connect_front('weights', '1:timeheightconv'),
+            *connect_front('biases', '2:timeheightconv'),
+            *connect_front('timeheightconv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+
+        conv = graph.nodes['timeheightconv']
+        conv['height_subsample'] = 1
+        conv['height_in'] = 80
+        conv['height_out'] = 80
+        conv['in_channels'] = 1
+        conv['out_channels'] = 12
+        conv['offsets'] = int64_array([[0, -1], [0, 0], [0, 1], [1, -1], [1, 0], [1, 1]])
+        conv['time_offsets'] = int64_array([0])
+        graph.nodes['weights']['value'] = np.zeros([72])
+
+        ref_graph = build_graph(self.nodes, [
+            *connect_front('placeholder', 'memoryoffset_1'),
+            *connect_front('placeholder', 'memoryoffset_2'),
+            *connect_front('memoryoffset_1', '0:concat'),
+            *connect_front('memoryoffset_2', '1:concat'),
+            *connect_front('concat', '0:conv'),
+            *connect_front('weights', '1:conv'),
+            *connect_front('biases', '2:conv'),
+            *connect_front('conv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+        ref_graph.nodes['weights']['value'] = np.zeros([72])
+        new_conv = ref_graph.nodes['conv']
+        new_conv['pad'] = int64_array([[0, 0], [0, 0], [0, 0], [1, 1]])
+        new_conv['dilation'] = int64_array([1, 1, 1, 1])
+        new_conv['kernel'] = int64_array([12, 1, 2, 3])
+        new_conv['stride'] = int64_array([1, 1, 1, 1])
+
+        ReplaceTimeHeightConvolutionPattern().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder_out', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_timeheightconvolution_2_offsets_dilation(self):
+        graph = build_graph(self.nodes, [
+            *connect_front('placeholder', '0:timeheightconv'),
+            *connect_front('weights', '1:timeheightconv'),
+            *connect_front('biases', '2:timeheightconv'),
+            *connect_front('timeheightconv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+
+        conv = graph.nodes['timeheightconv']
+        conv['height_subsample'] = 1
+        conv['height_in'] = 80
+        conv['height_out'] = 80
+        conv['in_channels'] = 1
+        conv['out_channels'] = 12
+        conv['offsets'] = int64_array([[-1, -3], [-1, 0], [-1, 3], [1, -3], [1, 0], [1, 3]])
+        conv['time_offsets'] = int64_array([-1])
+        graph.nodes['weights']['value'] = np.zeros([72])
+
+        ref_graph = build_graph(self.nodes, [
+            *connect_front('placeholder', 'memoryoffset_0'),
+            *connect_front('placeholder', 'memoryoffset_2'),
+            *connect_front('memoryoffset_0', '0:concat'),
+            *connect_front('memoryoffset_2', '1:concat'),
+            *connect_front('concat', '0:conv'),
+            *connect_front('weights', '1:conv'),
+            *connect_front('biases', '2:conv'),
+            *connect_front('conv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+        ref_graph.nodes['weights']['value'] = np.zeros([72])
+        new_conv = ref_graph.nodes['conv']
+        new_conv['pad'] = int64_array([[0, 0], [0, 0], [0, 0], [3, 3]])
+        new_conv['dilation'] = int64_array([1, 1, 2, 3])
+        new_conv['kernel'] = int64_array([12, 1, 2, 3])
+        new_conv['stride'] = int64_array([1, 1, 1, 1])
+
+        ReplaceTimeHeightConvolutionPattern().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder_out', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_timeheightconvolution_2_offsets_pad(self):
+        graph = build_graph(self.nodes, [
+            *connect_front('placeholder', '0:timeheightconv'),
+            *connect_front('weights', '1:timeheightconv'),
+            *connect_front('biases', '2:timeheightconv'),
+            *connect_front('timeheightconv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+        conv = graph.nodes['timeheightconv']
+        conv['height_subsample'] = 1
+        conv['height_in'] = 80
+        conv['height_out'] = 74
+        conv['in_channels'] = 1
+        conv['out_channels'] = 12
+        conv['offsets'] = int64_array([[-1, 0], [-1, 3], [-1, 6], [1, 0], [1, 3], [1, 6]])
+        conv['time_offsets'] = int64_array([-1])
+        graph.nodes['weights']['value'] = np.zeros([72])
+
+        ref_graph = build_graph(self.nodes, [
+            *connect_front('placeholder', 'memoryoffset_0'),
+            *connect_front('placeholder', 'memoryoffset_2'),
+            *connect_front('memoryoffset_0', '0:concat'),
+            *connect_front('memoryoffset_2', '1:concat'),
+            *connect_front('concat', '0:conv'),
+            *connect_front('weights', '1:conv'),
+            *connect_front('biases', '2:conv'),
+            *connect_front('conv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+        ref_graph.nodes['weights']['value'] = np.zeros([72])
+        new_conv = ref_graph.nodes['conv']
+        new_conv['pad'] = int64_array([[0, 0], [0, 0], [0, 0], [0, 0]])
+        new_conv['dilation'] = int64_array([1, 1, 2, 3])
+        new_conv['kernel'] = int64_array([12, 1, 2, 3])
+        new_conv['stride'] = int64_array([1, 1, 1, 1])
+
+        ReplaceTimeHeightConvolutionPattern().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder_out', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_timeheightconvolution_out_channels(self):
+        graph = build_graph(self.nodes, [
+            *connect_front('placeholder', '0:timeheightconv'),
+            *connect_front('weights', '1:timeheightconv'),
+            *connect_front('biases', '2:timeheightconv'),
+            *connect_front('timeheightconv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+        conv = graph.nodes['timeheightconv']
+        conv['height_subsample'] = 1
+        conv['height_in'] = 80
+        conv['height_out'] = 74
+        conv['in_channels'] = 3
+        conv['out_channels'] = 4
+        conv['offsets'] = int64_array([[-1, 0], [-1, 3], [-1, 6], [1, 0], [1, 3], [1, 6]])
+        conv['time_offsets'] = int64_array([-1])
+        graph.nodes['weights']['value'] = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                                    19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+                                                    37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
+                                                    55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72])
+
+        ref_graph = build_graph(self.nodes, [
+            *connect_front('placeholder', 'memoryoffset_0'),
+            *connect_front('placeholder', 'memoryoffset_2'),
+            *connect_front('memoryoffset_0', '0:concat'),
+            *connect_front('memoryoffset_2', '1:concat'),
+            *connect_front('concat', '0:conv'),
+            *connect_front('weights', '1:conv'),
+            *connect_front('biases', '2:conv'),
+            *connect_front('conv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+        ref_graph.nodes['weights']['value'] = np.array([1, 4, 7, 10, 13, 16, 2, 5, 8, 11, 14, 17, 3, 6, 9, 12, 15, 18,
+                                                        19, 22, 25, 28, 31, 34, 20, 23, 26, 29, 32, 35, 21, 24, 27, 30, 33, 36,
+                                                        37, 40, 43, 46, 49, 52, 38, 41, 44, 47, 50, 53, 39, 42, 45, 48, 51, 54,
+                                                        55, 58, 61, 64, 67, 70, 56, 59, 62, 65, 68, 71, 57, 60, 63, 66, 69, 72])
+        new_conv = ref_graph.nodes['conv']
+        new_conv['output'] = 4
+        new_conv['pad'] = int64_array([[0, 0], [0, 0], [0, 0], [0, 0]])
+        new_conv['dilation'] = int64_array([1, 1, 2, 3])
+        new_conv['kernel'] = int64_array([4, 3, 2, 3])
+        new_conv['stride'] = int64_array([1, 1, 1, 1])
+
+        ReplaceTimeHeightConvolutionPattern().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder_out', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_timeheightconvolution_2_offsets_stride(self):
+        graph = build_graph(self.nodes, [
+            *connect_front('placeholder', '0:timeheightconv'),
+            *connect_front('weights', '1:timeheightconv'),
+            *connect_front('biases', '2:timeheightconv'),
+            *connect_front('timeheightconv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+        conv = graph.nodes['timeheightconv']
+        conv['height_subsample'] = 2
+        conv['height_in'] = 80
+        conv['height_out'] = 37
+        conv['in_channels'] = 1
+        conv['out_channels'] = 12
+        conv['offsets'] = int64_array([[-1, 0], [-1, 3], [-1, 6], [1, 0], [1, 3], [1, 6]])
+        conv['time_offsets'] = int64_array([-1])
+        graph.nodes['weights']['value'] = np.zeros([72])
+
+        ref_graph = build_graph(self.nodes, [
+            *connect_front('placeholder', 'memoryoffset_0'),
+            *connect_front('placeholder', 'memoryoffset_2'),
+            *connect_front('memoryoffset_0', '0:concat'),
+            *connect_front('memoryoffset_2', '1:concat'),
+            *connect_front('concat', '0:conv'),
+            *connect_front('weights', '1:conv'),
+            *connect_front('biases', '2:conv'),
+            *connect_front('conv', 'placeholder_out')
+        ], nodes_with_edges_only=True)
+        ref_graph.nodes['weights']['value'] = np.zeros([72])
+        new_conv = ref_graph.nodes['conv']
+        new_conv['pad'] = int64_array([[0, 0], [0, 0], [0, 0], [0, 0]])
+        new_conv['dilation'] = int64_array([1, 1, 2, 3])
+        new_conv['kernel'] = int64_array([12, 1, 2, 3])
+        new_conv['stride'] = int64_array([1, 1, 1, 2])
+
+        ReplaceTimeHeightConvolutionPattern().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder_out', check_op_attrs=True)
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/unit_tests/mo/front/kaldi/extractors/convolutional_component_ext_test.py b/model-optimizer/unit_tests/mo/front/kaldi/extractors/convolutional_component_ext_test.py
index ea236da6cb8..fbcdea39794 100644
--- a/model-optimizer/unit_tests/mo/front/kaldi/extractors/convolutional_component_ext_test.py
+++ b/model-optimizer/unit_tests/mo/front/kaldi/extractors/convolutional_component_ext_test.py
@@ -22,7 +22,7 @@ class ConvolutionalComponentFrontExtractorTest(KaldiFrontExtractorTest):
         pb += KaldiFrontExtractorTest.write_tag_with_value('<PatchStride>', 4)
         pb += KaldiFrontExtractorTest.generate_learn_info()
         pb += b'<Filters> '
-        pb += KaldiFrontExtractorTest.generate_matrix([2, 1])
+        pb += KaldiFrontExtractorTest.generate_matrix([2, 4])
         pb += b'<Bias> '
         pb += KaldiFrontExtractorTest.generate_vector(2)
         cls.test_node['parameters'] = TestKaldiUtilsLoading.bytesio_from(pb)
@@ -50,6 +50,6 @@ class ConvolutionalComponentFrontExtractorTest(KaldiFrontExtractorTest):
                 self.assertEqual(self.test_node[attr], val_attrs[attr])
 
     def test_convolution_blobs(self):
-        self.assertTrue(np.array_equal(self.test_node.weights, [0, 1]))
+        self.assertTrue(np.array_equal(self.test_node.weights, [0, 1, 2, 3, 4, 5, 6, 7]))
         self.assertTrue(np.array_equal(self.test_node.biases, [0, 1]))
 

From 68ed12cb98ede913ee231ed0d1b2fce139e305a8 Mon Sep 17 00:00:00 2001
From: Vitaliy Urusovskij <vitaliy.urusovskij@intel.com>
Date: Thu, 29 Apr 2021 14:44:12 +0300
Subject: [PATCH 13/73] Enable `FetchContent` for `gflags` dependency in stress
 tests (#5449)

* Enable `FetchContent` for `gflags` dependency in stress tests

* Add `HAVE_SYS_STAT_H`, `HAVE_INTTYPES_H`, `INTTYPES_FORMAT` for stress tests
---
 tests/stress_tests/CMakeLists.txt                |  3 +++
 tests/stress_tests/memcheck_tests/CMakeLists.txt | 12 +++++++++++-
 tests/stress_tests/memleaks_tests/CMakeLists.txt | 12 +++++++++++-
 tests/stress_tests/unittests/CMakeLists.txt      | 12 +++++++++++-
 4 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/tests/stress_tests/CMakeLists.txt b/tests/stress_tests/CMakeLists.txt
index 5b5e7b7adf6..5a37fd9a711 100644
--- a/tests/stress_tests/CMakeLists.txt
+++ b/tests/stress_tests/CMakeLists.txt
@@ -9,6 +9,9 @@ if (CMAKE_BUILD_TYPE STREQUAL "")
     set(CMAKE_BUILD_TYPE "Release")
 endif()
 
+set (HAVE_SYS_STAT_H 1)
+set (HAVE_INTTYPES_H 1)
+set (INTTYPES_FORMAT C99)
 find_package(InferenceEngineDeveloperPackage REQUIRED)
 
 add_subdirectory(unittests)
diff --git a/tests/stress_tests/memcheck_tests/CMakeLists.txt b/tests/stress_tests/memcheck_tests/CMakeLists.txt
index 8c52cb306b1..635797c58e7 100644
--- a/tests/stress_tests/memcheck_tests/CMakeLists.txt
+++ b/tests/stress_tests/memcheck_tests/CMakeLists.txt
@@ -19,7 +19,17 @@ file (GLOB HDR
 # Create library file from sources.
 add_executable(${TARGET_NAME} ${HDR} ${SRC})
 
-find_package(gflags REQUIRED)
+include(FetchContent)
+FetchContent_Declare(
+    gflags
+    GIT_REPOSITORY "https://github.com/gflags/gflags.git"
+    GIT_TAG "v2.2.2"
+)
+FetchContent_GetProperties(gflags)
+if(NOT gflags_POPULATED)
+    FetchContent_Populate(gflags)
+    add_subdirectory(${gflags_SOURCE_DIR} ${gflags_BINARY_DIR})
+endif()
 
 target_link_libraries(${TARGET_NAME}
         IE::gtest
diff --git a/tests/stress_tests/memleaks_tests/CMakeLists.txt b/tests/stress_tests/memleaks_tests/CMakeLists.txt
index e8d0915136c..c24e43433d2 100644
--- a/tests/stress_tests/memleaks_tests/CMakeLists.txt
+++ b/tests/stress_tests/memleaks_tests/CMakeLists.txt
@@ -20,7 +20,17 @@ file (GLOB HDR
 # Create library file from sources.
 add_executable(${TARGET_NAME} ${HDR} ${SRC})
 
-find_package(gflags REQUIRED)
+include(FetchContent)
+FetchContent_Declare(
+    gflags
+    GIT_REPOSITORY "https://github.com/gflags/gflags.git"
+    GIT_TAG "v2.2.2"
+)
+FetchContent_GetProperties(gflags)
+if(NOT gflags_POPULATED)
+    FetchContent_Populate(gflags)
+    add_subdirectory(${gflags_SOURCE_DIR} ${gflags_BINARY_DIR})
+endif()
 
 target_link_libraries(${TARGET_NAME}
         IE::gtest
diff --git a/tests/stress_tests/unittests/CMakeLists.txt b/tests/stress_tests/unittests/CMakeLists.txt
index d642a96696d..fea767898f7 100644
--- a/tests/stress_tests/unittests/CMakeLists.txt
+++ b/tests/stress_tests/unittests/CMakeLists.txt
@@ -20,7 +20,17 @@ file (GLOB HDR
 # Create library file from sources.
 add_executable(${TARGET_NAME} ${HDR} ${SRC})
 
-find_package(gflags REQUIRED)
+include(FetchContent)
+FetchContent_Declare(
+    gflags
+    GIT_REPOSITORY "https://github.com/gflags/gflags.git"
+    GIT_TAG "v2.2.2"
+)
+FetchContent_GetProperties(gflags)
+if(NOT gflags_POPULATED)
+    FetchContent_Populate(gflags)
+    add_subdirectory(${gflags_SOURCE_DIR} ${gflags_BINARY_DIR})
+endif()
 
 target_link_libraries(${TARGET_NAME}
         IE::gtest

From 19afae3638aab3e6c6dc65535bb66b1d48513866 Mon Sep 17 00:00:00 2001
From: Vladimir Zinoviev <vladimir.zinoviev@intel.com>
Date: Thu, 29 Apr 2021 18:24:21 +0300
Subject: [PATCH 14/73] [LPT] INT4 FakeQuantize not transform (#5082)

---
 .../fake_quantize_transformation.cpp          | 29 ++++++++++--
 .../fake_quantize_transformation.cpp          | 28 ++++++++++--
 .../fake_quantize_transformation.hpp          | 12 +++--
 .../fake_quantize_transformation.cpp          | 44 +++++++------------
 .../fake_quantize_function.hpp                |  5 +++
 .../src/fake_quantize_function.cpp            | 25 +++++++++++
 6 files changed, 105 insertions(+), 38 deletions(-)

diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
index 6ab6f4e23eb..2f856a61cd7 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
@@ -25,10 +25,31 @@ const std::vector<LayerTransformation::Params> trasformationParamValues = {
     LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8()
 };
 
-const std::vector<ngraph::builder::subgraph::FakeQuantizeOnData> fakeQuantizeOnDataValues = {
-    { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
-    { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
-    { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } },
+const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
+    {
+        {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+        "Pooling", "U8"
+    },
+    {
+        { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
+        "Pooling", "U8"
+    },
+    {
+        { 256ul, {}, { 0.f }, { 2.55f }, { -1.28f }, { 1.27f } },
+        "Pooling", "I8"
+    },
+    {
+        { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } },
+        "Pooling", "U8"
+    },
+    {
+        { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
+        "Pooling", "FP32"
+    },
+    {
+        { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } },
+        "Pooling", "FP32"
+    },
         // nGraph: I8->FP32 Convert is not supported
     // { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } },
     // { 256ul, { 1ul }, { -1.28f} , { 1.27f } }
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
index 1a00abb7f03..35f047794da 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
@@ -25,9 +25,31 @@ const std::vector<LayerTransformation::Params> trasformationParamValues = {
     LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8()
 };
 
-const std::vector<ngraph::builder::subgraph::FakeQuantizeOnData> fakeQuantizeOnDataValues = {
-    { 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
-    { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
+const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
+        {
+                {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+                "Pooling", "U8"
+        },
+        {
+                { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
+                "Pooling", "U8"
+        },
+        {
+                { 256ul, {}, { 0.f }, { 2.55f }, { -1.28f }, { 1.27f } },
+                "Pooling", "I8"
+        },
+        {
+                { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } },
+                "Pooling", "U8"
+        },
+        {
+                { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
+                "Pooling", "FP32"
+        },
+        {
+                { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } },
+                "Pooling", "FP32"
+        },
     // nGraph: I8->FP32 Convert is not supported
     // { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } },
     // { 256ul, { 1ul }, { -1.28f} , { 1.27f } }
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp
index aef99adf002..f2b82386c5e 100644
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/fake_quantize_transformation.hpp
@@ -10,13 +10,20 @@
 #include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
 
 namespace LayerTestsDefinitions {
+class FakeQuantizeTransformationParam {
+public:
+    ngraph::builder::subgraph::FakeQuantizeOnData fakequantize;
+
+    std::string layerName;
+    std::string expectedKernelType;
+};
 
 typedef std::tuple<
     ngraph::element::Type,
     ngraph::Shape,
     std::string,
     ngraph::pass::low_precision::LayerTransformation::Params,
-    ngraph::builder::subgraph::FakeQuantizeOnData> FakeQuantizeTransformationParams;
+    FakeQuantizeTransformationParam> FakeQuantizeTransformationParams;
 
 class FakeQuantizeTransformation :
     public testing::WithParamInterface<FakeQuantizeTransformationParams>,
@@ -27,8 +34,7 @@ public:
 protected:
     void SetUp() override;
 
-private:
-    void validate();
+    void Run() override;
 };
 
 }  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp
index bd65adae44b..4f14e33a757 100644
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/fake_quantize_transformation.cpp
@@ -22,11 +22,11 @@ std::string FakeQuantizeTransformation::getTestCaseName(testing::TestParamInfo<F
     ngraph::Shape inputShape;
     std::string targetDevice;
     ngraph::pass::low_precision::LayerTransformation::Params params;
-    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData;
-    std::tie(netPrecision, inputShape, targetDevice, params, fakeQuantizeOnData) = obj.param;
+    FakeQuantizeTransformationParam testParams;
+    std::tie(netPrecision, inputShape, targetDevice, params, testParams) = obj.param;
 
     std::ostringstream result;
-    result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" << fakeQuantizeOnData;
+    result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" << testParams.fakequantize;
     return result.str();
 }
 
@@ -34,37 +34,25 @@ void FakeQuantizeTransformation::SetUp() {
     ngraph::element::Type netPrecision;
     ngraph::Shape inputShape;
     ngraph::pass::low_precision::LayerTransformation::Params params;
-    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData;
-    std::tie(netPrecision, inputShape, targetDevice, params, fakeQuantizeOnData) = this->GetParam();
+    FakeQuantizeTransformationParam testParams;
+    std::tie(netPrecision, inputShape, targetDevice, params, testParams) = this->GetParam();
 
-    function = ngraph::builder::subgraph::FakeQuantizeFunction::getOriginal(
+    function = ngraph::builder::subgraph::FakeQuantizeFunction::getOriginalWithMaxPool(
         netPrecision,
         inputShape,
-        fakeQuantizeOnData);
-
-    ngraph::pass::InitNodeInfo().run_on_function(function);
-    validate();
+        testParams.fakequantize);
 }
 
-void FakeQuantizeTransformation::validate() {
-    ngraph::element::Type precision;
-    ngraph::Shape inputShapes;
-    std::string targetDevice;
-    ngraph::pass::low_precision::LayerTransformation::Params params;
-    ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData;
-    std::tie(precision, inputShapes, targetDevice, params, fakeQuantizeOnData) = this->GetParam();
+void FakeQuantizeTransformation::Run() {
+    LayerTestsCommon::Run();
 
-    auto transformations = getLowPrecisionTransformationsNGraph(params);
-    transformations.removeStandaloneCleanup<ngraph::pass::low_precision::FuseSubtractToFakeQuantizeTransformation, ngraph::opset1::Subtract>();
-    transformations.removeStandaloneCleanup<ngraph::pass::low_precision::FuseMultiplyToFakeQuantizeTransformation, ngraph::opset1::Multiply>();
-
-    const auto transformed = transformNGraph(params, transformations);
-    EXPECT_EQ(1ul, transformed->get_output_size());
-
-    const auto output = transformed->get_output_op(0);
-    const auto scaleShift = output->get_input_node_shared_ptr(0);
-    const std::string typeName = scaleShift->get_type_name();
-    ASSERT_EQ("ScaleShiftIE", typeName);
+    const auto params = std::get<4>(GetParam());
+    const auto actualPrecision = getRuntimePrecisionByType(params.layerName);
+    auto expectedPrecision = params.expectedKernelType;
+    if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) {
+        expectedPrecision = "FP16";
+    }
+    EXPECT_EQ(actualPrecision, expectedPrecision);
 }
 
 TEST_P(FakeQuantizeTransformation, CompareWithRefImpl) {
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_function.hpp
index c0a7da296be..92dbdc1df53 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fake_quantize_function.hpp
@@ -23,6 +23,11 @@ public:
         const ngraph::Shape& inputShape,
         const FakeQuantizeOnData& fakeQuantizeOnData);
 
+    static std::shared_ptr<ngraph::Function> getOriginalWithMaxPool(
+            const ngraph::element::Type precision,
+            const ngraph::Shape& inputShape,
+            const FakeQuantizeOnData& fakeQuantizeOnData);
+
     static std::shared_ptr<ngraph::Function> getReference(
         const ngraph::element::Type precision,
         const ngraph::Shape& inputShape,
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_function.cpp
index 050dae69841..f9b802fad2d 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/fake_quantize_function.cpp
@@ -20,6 +20,31 @@ namespace subgraph {
 
 using namespace ngraph::pass;
 
+std::shared_ptr<ngraph::Function> FakeQuantizeFunction::getOriginalWithMaxPool(
+        const ngraph::element::Type precision,
+        const ngraph::Shape& inputShape,
+        const FakeQuantizeOnData& fakeQuantizeOnData) {
+    const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
+    input->set_friendly_name("input");
+
+    const auto fakeQuantize = ngraph::builder::makeFakeQuantize(
+        input, element::f32, fakeQuantizeOnData.quantizationLevel, fakeQuantizeOnData.constantShape,
+        fakeQuantizeOnData.inputLowValues, fakeQuantizeOnData.inputHighValues, fakeQuantizeOnData.outputLowValues, fakeQuantizeOnData.outputHighValues);
+    const auto maxPool = std::make_shared<opset1::MaxPool>(
+        fakeQuantize,
+        Strides{ 1, 1 },
+        Shape{ 1, 1 },
+        Shape{ 0, 0 },
+        Shape{ 2, 2 });
+
+    fakeQuantize->set_friendly_name("fakeQuantize");
+    auto& rtInfo = fakeQuantize->get_rt_info();
+    rtInfo["Variant::std::string"] = std::make_shared<VariantWrapper<std::string>>("fakeQuantize");
+
+    ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(maxPool) };
+    return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "FakeQuantizeFunction");
+}
+
 std::shared_ptr<ngraph::Function> FakeQuantizeFunction::getOriginal(
     const ngraph::element::Type precision,
     const ngraph::Shape& inputShape,

From 05dc0c8cf74d975e18099524664b082ac24fad9b Mon Sep 17 00:00:00 2001
From: Taylor Yeonbok Lee <taylor.lee@intel.com>
Date: Fri, 30 Apr 2021 00:27:05 +0900
Subject: [PATCH 15/73] [IE CLDNN] WA for memory increase problem of parallel
 build for OCL (#5389)

In linux, without malloc_trim, some freed memories are not being returned to system.
Current hypothesis is that a large allocation for compilation is not completely freeed, thought mostly freed.
This does not happendin Windows.
So, added malloc_trim for linux build until we figure out a better solution.
---
 .../clDNN/src/gpu/kernels_cache.cpp           | 35 ++++++++++++++-----
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp b/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp
index 7631afc0ccb..1451d68de5f 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp
@@ -24,6 +24,9 @@
 #include <queue>
 #include <condition_variable>
 #endif
+#if defined(__unix__) && !defined(__ANDROID__)
+#include <malloc.h>
+#endif
 
 #ifndef ENABLE_UNICODE_PATH_SUPPORT
 # ifdef _WIN32
@@ -237,14 +240,6 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code,
 }
 
 kernels_cache::kernels_cache(gpu_toolkit& context, uint32_t prog_id) : _context(context), _prog_id(prog_id) {
-#if (CLDNN_THREADING == CLDNN_THREADING_TBB)
-    int n_threads = _context.get_configuration().n_threads;
-    arena = std::unique_ptr<tbb::task_arena>(new tbb::task_arena());
-    arena->initialize(n_threads);
-#elif(CLDNN_THREADING == CLDNN_THREADING_THREADPOOL)
-    int n_threads = _context.get_configuration().n_threads;
-    pool = std::unique_ptr<thread_pool>(new thread_pool(n_threads));
-#endif
 }
 
 kernels_cache::kernel_id kernels_cache::set_kernel_source(
@@ -406,6 +401,14 @@ void kernels_cache::build_all() {
         std::lock_guard<std::mutex> lock(_context.get_cache_mutex());
         get_program_source(_kernels_code, &batches);
         _one_time_kernels.clear();
+#if (CLDNN_THREADING == CLDNN_THREADING_TBB)
+        int n_threads = _context.get_configuration().n_threads;
+        arena = std::unique_ptr<tbb::task_arena>(new tbb::task_arena());
+        arena->initialize(n_threads);
+#elif(CLDNN_THREADING == CLDNN_THREADING_THREADPOOL)
+        int n_threads = _context.get_configuration().n_threads;
+        pool = std::unique_ptr<thread_pool>(new thread_pool(n_threads));
+#endif
     }
 
 #if (CLDNN_THREADING == CLDNN_THREADING_TBB)
@@ -435,6 +438,22 @@ void kernels_cache::build_all() {
         std::lock_guard<std::mutex> lock(_context.get_cache_mutex());
         _kernels_code.clear();
         _pending_compilation = false;
+#if (CLDNN_THREADING == CLDNN_THREADING_TBB)
+        arena.reset();
+#if defined(__unix__) && !defined(__ANDROID__)
+    //  NOTE: In linux, without malloc_trim, an amount of the memory used by compilation is not being returned to system thought they are freed.
+    //  (It is at least 500 MB when we perform parallel compilation)
+    //  It is observed that freeing the memory manually with malloc_trim saves significant amount of the memory.
+    //  Also, this is not happening in Windows.
+    //  So, added malloc_trim for linux build until we figure out a better solution.
+        malloc_trim(0);
+#endif
+#elif(CLDNN_THREADING == CLDNN_THREADING_THREADPOOL)
+        pool.reset();
+#if defined(__unix__) && !defined(__ANDROID__)
+        malloc_trim(0);
+#endif
+#endif
     }
 }
 

From 2062a648a74af5128ac0c8c12a2dc158d11b552e Mon Sep 17 00:00:00 2001
From: Gabriele Galiero Casay <gabriele.galiero.casay@intel.com>
Date: Thu, 29 Apr 2021 18:21:45 +0200
Subject: [PATCH 16/73] Gelu-7 specification refactoring (#5439)

* Review spec of Gelu-7 operation

* Address review comments

 * Modified formulas
 * Changed type from `T` to *T*
---
 docs/ops/activation/GELU_7.md | 60 +++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/docs/ops/activation/GELU_7.md b/docs/ops/activation/GELU_7.md
index 1619894d173..b89e80bff7b 100644
--- a/docs/ops/activation/GELU_7.md
+++ b/docs/ops/activation/GELU_7.md
@@ -2,53 +2,63 @@
 
 **Versioned name**: *Gelu-7*
 
-**Category**: *Activation*
+**Category**: *Activation function*
 
-**Short description**: Calculates Gaussian error linear.
+**Short description**: Gaussian error linear unit element-wise activation function.
 
-**Detailed description**: `Gelu(x) = x * Φ(x)`, where `Φ(x)` is the Cumulative Distribution Function for Gaussian Distribution.
-The Gelu operation is introduced in the [paper](https://arxiv.org/abs/1606.08415).
+**Detailed description**:
+
+*Gelu* operation is introduced in this [article](https://arxiv.org/abs/1606.08415).
+It performs element-wise activation function on a given input tensor, based on the following mathematical formula:
+
+\f[
+    Gelu(x) = x\cdot\Phi(x)
+\f]
+
+where `Φ(x)` is the Cumulative Distribution Function for Gaussian Distribution.
+
+The *Gelu* function may be approximated in two different ways based on *approximation_mode* attribute.
+
+For `erf` approximation mode, *Gelu* function is represented as:
+
+\f[
+    Gelu(x) = x\cdot\Phi(x) = x\cdot\frac{1}{2}\cdot\left[1 + erf\left(x/\sqrt{2}\right)\right]
+\f]
+
+For `tanh` approximation mode, *Gelu* function is represented as:
+
+\f[
+    Gelu(x) \approx x\cdot\frac{1}{2}\cdot \left(1 + \tanh\left[\sqrt{2/\pi} \cdot (x + 0.044715 \cdot x^3)\right]\right)
+\f]
 
 **Attributes**
 
 * *approximation_mode*
 
-  * **Description**: Specifies the formulae to calculate the output.
+  * **Description**: Specifies the formulae to calculate the *Gelu* function.
   * **Range of values**:
-    * `erf` -- calculate output using the Gauss error function.
-    * `tanh` -- calculate output using tanh approximation
+    * `erf` - calculate output using the Gauss error function
+    * `tanh` - calculate output using tanh approximation
   * **Type**: `string`
   * **Default value**: `erf`
   * **Required**: *no*
 
-
-**Mathematical Formulation**
-
-For the `erf` approximation mode:
-\f[
-    Gelu(x) = 0.5 \cdot x \cdot (1.0 + erf((x) / \sqrt{2})
-\f]
-
-For the `tanh` approximation mode:
-
-\f[
-    Gelu(x) \approx 0.5 \cdot x \cdot (1.0 + tanh(\sqrt{2.0/pi} \cdot (x + 0.044715 \cdot x ^ 3))
-\f]
-
 **Inputs**:
 
-*   **1**: Multidimensional input tensor of type *T*. Required.
+*   **1**: A tensor of type *T* and arbitrary shape. **Required**.
 
 **Outputs**:
 
-*   **1**: Floating point tensor with shape and type *T* matching the input tensor.
+*   **1**: The result of element-wise *Gelu* function applied to the input tensor. A tensor of type *T* and the same shape as input tensor.
 
 **Types**
 
-* *T*: any floating point type.
+* *T*: arbitrary supported floating-point type.
 
 **Examples**
 
+*Example: `tanh` approximation mode*
+
 ```xml
 <layer ... type="Gelu">
     <data approximation_mode="tanh"/>
@@ -67,6 +77,8 @@ For the `tanh` approximation mode:
 </layer>
 ```
 
+*Example: `erf` approximation mode*
+
 ```xml
 <layer ... type="Gelu">
     <data approximation_mode="erf"/>

From c350f61a426c3abcafb5820c2bba38f2ffb0a18f Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Thu, 29 Apr 2021 19:50:46 +0300
Subject: [PATCH 17/73] Move all base wrapper classes from Plugin API to source
 folder (#5419)

* Small refactoring in TEMPLATE plugin

* Fixed compilation on Windows

* Fixed code style

* Hide CALL_STATUS_FNC helpers to private API

* Moved some base classes to private place from plugin_api

* Updates for VariableState creation

* Take Jane's changes for Demension names

* Revert "Take Jane's changes for Demension names"

This reverts commit 9f6c8fa5a6460a9b70df15f2a201977a8b38aedd.

* Removed ICNNNetwork include

* removed more icnnnetwork includes

* Added missed include with ie_input_info.hpp

* Fixed GNA plugin to provide names w/o \0
---
 docs/template_plugin/src/template_config.cpp  |  1 -
 inference-engine/include/cpp/ie_cnn_network.h | 15 +---
 .../include/cpp/ie_memory_state.hpp           | 34 +++++-----
 inference-engine/include/ie_common.h          | 33 ---------
 .../src/cldnn_engine/cldnn_common_utils.h     |  1 -
 .../src/cldnn_engine/cldnn_config.cpp         |  1 -
 .../src/gna_plugin/gna_executable_network.hpp |  2 -
 .../src/gna_plugin/gna_model_serial.cpp       |  8 +--
 .../src/gna_plugin/gna_model_serial.hpp       |  2 -
 .../src/gna_plugin/gna_plugin.cpp             |  1 -
 .../src/gna_plugin/layers/gna_layer_type.cpp  |  1 -
 .../src/gna_plugin/layers/gna_layer_type.hpp  |  1 -
 .../inference_engine/compilation_context.cpp  |  1 -
 .../cpp}/exception2status.hpp                 | 17 +++++
 .../inference_engine/cpp/ie_cnn_network.cpp   | 10 ++-
 .../cpp/ie_executable_network.cpp             | 10 +--
 .../cpp}/ie_executable_network_base.hpp       |  6 +-
 .../cpp}/ie_infer_async_request_base.hpp      |  4 +-
 .../inference_engine/cpp/ie_infer_request.cpp | 13 ++--
 .../cpp/ie_variable_state.cpp                 | 36 ++++++----
 .../cpp}/ie_variable_state_base.hpp           |  2 +-
 .../src/inference_engine/ie_common.cpp        |  1 -
 .../src/inference_engine/ie_core.cpp          |  1 -
 .../src/inference_engine/ie_plugin_cpp.hpp    | 50 +++++++-------
 .../include/legacy/graph_transformer.h        |  1 -
 .../legacy_api/src/network_serializer_v7.hpp  |  1 -
 .../multi_device_exec_network.cpp             |  1 -
 .../multi_device_infer_request.cpp            |  1 -
 ...executable_network_thread_safe_default.hpp |  1 -
 ...nfer_async_request_thread_safe_default.hpp |  1 -
 .../impl/ie_plugin_internal.hpp               |  1 -
 .../interface/ie_iinfer_request_internal.hpp  |  1 -
 .../interface/ie_iplugin_internal.hpp         |  1 -
 .../src/readers/ir_reader/ie_ir_reader.hpp    |  1 -
 .../src/readers/ir_reader_v7/parsers.h        |  2 -
 .../src/vpu/common/src/parsed_config_base.cpp |  1 -
 .../include/vpu/blob_reader.hpp               |  1 -
 .../include/vpu/graph_transformer.hpp         |  1 -
 .../include/vpu/model/model.hpp               |  2 -
 .../graph_transformer/src/parsed_config.cpp   |  1 -
 .../src/vpu/myriad_plugin/myriad_config.cpp   |  2 -
 .../async_infer_request_test.cpp              |  1 -
 .../include/behavior/core_integration.hpp     |  1 -
 .../shared/include/behavior/infer_request.hpp |  1 -
 .../include/behavior/infer_request_input.hpp  |  1 -
 .../include/behavior/infer_request_output.hpp |  1 -
 .../common_test_utils/test_assertions.hpp     |  2 +-
 .../mock_iexecutable_network_internal.hpp     |  1 -
 .../mocks/mock_engine/mock_plugin.cpp         |  1 -
 .../mocks/mock_engine/mock_plugin.hpp         |  1 -
 .../cpp_interfaces}/exception_test.cpp        |  2 +-
 .../ie_executable_network_base_test.cpp       | 68 -------------------
 .../ie_infer_async_request_base_test.cpp      |  3 +-
 ...async_request_thread_safe_default_test.cpp |  2 +-
 .../ie_memory_state_internal_test.cpp         |  4 +-
 .../cpp_interfaces/ie_plugin_test.cpp         |  2 +-
 .../inference_engine/ie_exception_test.cpp    |  2 -
 .../ie_executable_network_test.cpp            |  1 +
 .../plugin_tests/behavior_test_plugin.h       |  1 -
 .../include/object_detection_matcher.hpp      |  1 -
 .../tests_deprecated/helpers/tests_common.hpp |  9 ---
 .../unit/engines/gna/gna_matcher.hpp          |  1 -
 .../layers/internal/graph_leaks_test.cpp      |  4 --
 63 files changed, 122 insertions(+), 259 deletions(-)
 rename inference-engine/src/{plugin_api/cpp_interfaces => inference_engine/cpp}/exception2status.hpp (71%)
 rename inference-engine/src/{plugin_api/cpp_interfaces/base => inference_engine/cpp}/ie_executable_network_base.hpp (95%)
 rename inference-engine/src/{plugin_api/cpp_interfaces/base => inference_engine/cpp}/ie_infer_async_request_base.hpp (98%)
 rename inference-engine/src/{plugin_api/cpp_interfaces/base => inference_engine/cpp}/ie_variable_state_base.hpp (97%)
 rename inference-engine/tests/{functional/inference_engine => unit/inference_engine/cpp_interfaces}/exception_test.cpp (98%)
 delete mode 100644 inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_executable_network_base_test.cpp

diff --git a/docs/template_plugin/src/template_config.cpp b/docs/template_plugin/src/template_config.cpp
index b713cfa98ea..ec75893462b 100644
--- a/docs/template_plugin/src/template_config.cpp
+++ b/docs/template_plugin/src/template_config.cpp
@@ -4,7 +4,6 @@
 
 #include <ie_plugin_config.hpp>
 #include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 
 #include "template_config.hpp"
 #include "template/template_config.hpp"
diff --git a/inference-engine/include/cpp/ie_cnn_network.h b/inference-engine/include/cpp/ie_cnn_network.h
index 476b78604f0..9e9c41e298d 100644
--- a/inference-engine/include/cpp/ie_cnn_network.h
+++ b/inference-engine/include/cpp/ie_cnn_network.h
@@ -190,18 +190,12 @@ public:
 
     /**
      * @brief Method maps framework tensor name to OpenVINO name
-     *
      * @param orig_name Framework tensor name
-     *
      * @return OpenVINO name
      */
-    std::string getOVNameForTensor(const std::string& orig_name) const {
-        std::string ov_name;
-        CALL_STATUS_FNC(getOVNameForTensor, ov_name, orig_name);
-        return ov_name;
-    }
+    std::string getOVNameForTensor(const std::string& orig_name) const;
 
-protected:
+private:
     IE_SUPPRESS_DEPRECATED_START
     /**
      * @brief Network extra interface, might be nullptr
@@ -213,11 +207,6 @@ protected:
      */
     ICNNNetwork* actual = nullptr;
     IE_SUPPRESS_DEPRECATED_END
-
-    /**
-     * @brief A pointer to output data
-     */
-    DataPtr output;
 };
 
 }  // namespace InferenceEngine
diff --git a/inference-engine/include/cpp/ie_memory_state.hpp b/inference-engine/include/cpp/ie_memory_state.hpp
index 88cec51177e..5baecc2de5c 100644
--- a/inference-engine/include/cpp/ie_memory_state.hpp
+++ b/inference-engine/include/cpp/ie_memory_state.hpp
@@ -17,29 +17,31 @@
 
 namespace InferenceEngine {
 
-IE_SUPPRESS_DEPRECATED_START
-class IVariableState;
-IE_SUPPRESS_DEPRECATED_END
+namespace details {
+class SharedObjectLoader;
+}
+
+class IVariableStateInternal;
 
 /**
  * @brief C++ exception based error reporting wrapper of API class IVariableState
  */
 class INFERENCE_ENGINE_API_CLASS(VariableState) {
-    IE_SUPPRESS_DEPRECATED_START
-    std::shared_ptr<IVariableState> actual = nullptr;
-    IE_SUPPRESS_DEPRECATED_END
-    details::SharedObjectLoader::Ptr plugin = nullptr;
+    std::shared_ptr<IVariableStateInternal> _impl = nullptr;
+    details::SharedObjectLoader::Ptr _so = nullptr;
+
+    /**
+     * @brief Constructs VariableState from the initialized std::shared_ptr
+     * @param impl Initialized shared pointer
+     * @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
+     */
+    explicit VariableState(const std::shared_ptr<IVariableStateInternal>& impl,
+                           const details::SharedObjectLoader::Ptr& so = {});
+
+    friend class InferRequest;
+    friend class ExecutableNetwork;
 
 public:
-    IE_SUPPRESS_DEPRECATED_START
-    /**
-     * @brief constructs VariableState from the initialized std::shared_ptr
-     * @param pState Initialized shared pointer
-     * @param plg Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
-     */
-    explicit VariableState(std::shared_ptr<IVariableState> pState, details::SharedObjectLoader::Ptr plg = {});
-    IE_SUPPRESS_DEPRECATED_END
-
     /**
      * @copybrief IVariableState::Reset
      *
diff --git a/inference-engine/include/ie_common.h b/inference-engine/include/ie_common.h
index efae34f4d3f..f0fef03671c 100644
--- a/inference-engine/include/ie_common.h
+++ b/inference-engine/include/ie_common.h
@@ -473,39 +473,6 @@ struct NullStream {
         default: IE_ASSERT(!"Unreachable");                                                     \
     }
 
-/**
- * @private
- */
-#define CALL_STATUS_FNC(function, ...)                                                          \
-    if (!actual) IE_THROW() << "Wrapper used was not initialized.";                     \
-    ResponseDesc resp;                                                                          \
-    auto res = actual->function(__VA_ARGS__, &resp);                                            \
-    if (res != OK) IE_EXCEPTION_SWITCH(res, ExceptionType,                                      \
-            InferenceEngine::details::ThrowNow<ExceptionType>{}                                 \
-                <<= std::stringstream{} << IE_LOCATION << resp.msg)
-
-/**
- * @private
- */
-#define CALL_STATUS_FNC_NO_ARGS(function)                                                                   \
-    if (!actual)  IE_THROW() << "Wrapper used in the CALL_STATUS_FNC_NO_ARGS was not initialized."; \
-    ResponseDesc resp;                                                                                      \
-    auto res = actual->function(&resp);                                                                     \
-    if (res != OK) IE_EXCEPTION_SWITCH(res, ExceptionType,                                                  \
-            InferenceEngine::details::ThrowNow<ExceptionType>{}                                             \
-                <<= std::stringstream{} << IE_LOCATION)
-
-/**
- * @private
- */
-#define CALL_FNC_NO_ARGS(function)         \
-    if (!actual) IE_THROW() << "Wrapper used in the CALL_FNC_NO_ARGS was not initialized."; \
-    ResponseDesc resp;                     \
-    auto result = actual->function(&resp); \
-    if (resp.msg[0] != '\0') {             \
-         IE_THROW() << resp.msg    \
-    }                                      \
-    return result;
 }  // namespace details
 }  // namespace InferenceEngine
 #if defined(_WIN32)
diff --git a/inference-engine/src/cldnn_engine/cldnn_common_utils.h b/inference-engine/src/cldnn_engine/cldnn_common_utils.h
index cdf6b9d285b..c374a71a465 100644
--- a/inference-engine/src/cldnn_engine/cldnn_common_utils.h
+++ b/inference-engine/src/cldnn_engine/cldnn_common_utils.h
@@ -5,7 +5,6 @@
 #pragma once
 
 #include <ie_layouts.h>
-#include <cpp_interfaces/exception2status.hpp>
 #include <api/layout.hpp>
 
 #include "ngraph/type/element_type.hpp"
diff --git a/inference-engine/src/cldnn_engine/cldnn_config.cpp b/inference-engine/src/cldnn_engine/cldnn_config.cpp
index c25ef88d122..99df5fb8d77 100644
--- a/inference-engine/src/cldnn_engine/cldnn_config.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_config.cpp
@@ -6,7 +6,6 @@
 
 #include <cldnn/cldnn_config.hpp>
 #include "cldnn_config.h"
-#include "cpp_interfaces/exception2status.hpp"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "ie_api.h"
 #include "file_utils.h"
diff --git a/inference-engine/src/gna_plugin/gna_executable_network.hpp b/inference-engine/src/gna_plugin/gna_executable_network.hpp
index e929ad7ad79..9da0c7070a9 100644
--- a/inference-engine/src/gna_plugin/gna_executable_network.hpp
+++ b/inference-engine/src/gna_plugin/gna_executable_network.hpp
@@ -61,8 +61,6 @@ class GNAExecutableNetwork : public InferenceEngine::ExecutableNetworkInternal {
     INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
     std::vector<InferenceEngine::IVariableStateInternal::Ptr>  QueryState() override {
         IE_SUPPRESS_DEPRECATED_START
-        auto pluginStates = plg->QueryState();
-        std::vector<InferenceEngine::IVariableStateInternal::Ptr> state(pluginStates.begin(), pluginStates.end());
         return plg->QueryState();
         IE_SUPPRESS_DEPRECATED_END
     }
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp
index 6cc23248a14..fdb99d7f273 100644
--- a/inference-engine/src/gna_plugin/gna_model_serial.cpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp
@@ -308,12 +308,12 @@ void GNAModelSerial::Import(void *basePointer,
                 readBits(segmentSz, is);
                 uint32_t nameSize = 0;
                 readNBits<32>(nameSize, is);
-                std::string inName("", nameSize);
+                std::string inName(nameSize, '\0');
                 readNBytes(&inName[0], nameSize, is);
                 float scale_factor = 1.0f;
                 readBits(scale_factor, is);
                 if (pstates) {
-                    (*pstates)[i] = std::make_tuple( pSegment, segmentSz, inName, scale_factor);
+                    (*pstates)[i] = std::make_tuple( pSegment, segmentSz, inName.substr(0, nameSize - 1), scale_factor);
                 }
             }
         }
@@ -617,12 +617,12 @@ void GNAModelSerial::Import(void *basePointer,
                 readBits(segmentSz, is);
                 uint32_t nameSize = 0;
                 readNBits<32>(nameSize, is);
-                std::string inName("", nameSize);
+                std::string inName(nameSize, '\0');
                 readNBytes(&inName[0], nameSize, is);
                 float scale_factor = 1.0f;
                 readBits(scale_factor, is);
                 if (pstates) {
-                    (*pstates)[i] = std::make_tuple( pSegment, segmentSz, inName, scale_factor );
+                    (*pstates)[i] = std::make_tuple( pSegment, segmentSz, inName.substr(0, nameSize - 1), scale_factor );
                 }
             }
         }
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.hpp b/inference-engine/src/gna_plugin/gna_model_serial.hpp
index a0c8e08fed5..d756a23f9fc 100644
--- a/inference-engine/src/gna_plugin/gna_model_serial.hpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.hpp
@@ -8,7 +8,6 @@
 #include <vector>
 #include <utility>
 #include <ie_input_info.hpp>
-#include <ie_icnn_network.hpp>
 
 #include "descriptions/gna_input_desc.hpp"
 #include "descriptions/gna_output_desc.hpp"
@@ -158,7 +157,6 @@ private:
 
     /**
      * save gna graph to an outpus stream
-     * @param ptr_nnet
      * @param basePtr
      * @param gnaGraphSize
      * @param os
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index 18b84ec3690..0559dbf24b8 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -19,7 +19,6 @@
 #include <limits>
 
 #include <legacy/graph_tools.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 #include <legacy/net_pass.h>
 #include <debug.h>
 #include <gna/gna_config.hpp>
diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_type.cpp b/inference-engine/src/gna_plugin/layers/gna_layer_type.cpp
index 5ae65a741aa..a333d47c48a 100644
--- a/inference-engine/src/gna_plugin/layers/gna_layer_type.cpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_type.cpp
@@ -4,7 +4,6 @@
 
 #include <string>
 #include <unordered_set>
-#include <ie_icnn_network.hpp>
 #include <legacy/graph_tools.hpp>
 #include "gna_layer_type.hpp"
 #include "gna_layer_info.hpp"
diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp
index 9634c570d30..9f74291fc8b 100644
--- a/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_type.hpp
@@ -7,7 +7,6 @@
 #include <vector>
 #include <string>
 
-#include <ie_icnn_network.hpp>
 #include <caseless.hpp>
 
 #include "backend/dnn_types.h"
diff --git a/inference-engine/src/inference_engine/compilation_context.cpp b/inference-engine/src/inference_engine/compilation_context.cpp
index bcbf8627ba0..2cd9a13375a 100644
--- a/inference-engine/src/inference_engine/compilation_context.cpp
+++ b/inference-engine/src/inference_engine/compilation_context.cpp
@@ -13,7 +13,6 @@
 #include <xml_parse_utils.h>
 
 #include "ie_itt.hpp"
-#include "cpp_interfaces/exception2status.hpp"
 #include "transformations/serialize.hpp"
 #include "cpp/ie_cnn_network.h"
 #include "details/ie_exception.hpp"
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/exception2status.hpp b/inference-engine/src/inference_engine/cpp/exception2status.hpp
similarity index 71%
rename from inference-engine/src/plugin_api/cpp_interfaces/exception2status.hpp
rename to inference-engine/src/inference_engine/cpp/exception2status.hpp
index 8c9e41e2a54..529f77078c1 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/exception2status.hpp
+++ b/inference-engine/src/inference_engine/cpp/exception2status.hpp
@@ -63,4 +63,21 @@ namespace InferenceEngine {
         CATCH_IE_EXCEPTION(InferNotStarted)     \
         CATCH_IE_EXCEPTION(NetworkNotRead)      \
         CATCH_IE_EXCEPTION(InferCancelled)
+
+#define CALL_STATUS_FNC(function, ...)                                                          \
+    if (!actual) IE_THROW() << "Wrapper used was not initialized.";                             \
+    ResponseDesc resp;                                                                          \
+    auto res = actual->function(__VA_ARGS__, &resp);                                            \
+    if (res != OK) IE_EXCEPTION_SWITCH(res, ExceptionType,                                      \
+            InferenceEngine::details::ThrowNow<ExceptionType>{}                                 \
+                <<= std::stringstream{} << IE_LOCATION << resp.msg)
+
+#define CALL_STATUS_FNC_NO_ARGS(function)                                                                   \
+    if (!actual)  IE_THROW() << "Wrapper used in the CALL_STATUS_FNC_NO_ARGS was not initialized.";         \
+    ResponseDesc resp;                                                                                      \
+    auto res = actual->function(&resp);                                                                     \
+    if (res != OK) IE_EXCEPTION_SWITCH(res, ExceptionType,                                                  \
+            InferenceEngine::details::ThrowNow<ExceptionType>{}                                             \
+                <<= std::stringstream{} << IE_LOCATION)
+
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/cpp/ie_cnn_network.cpp b/inference-engine/src/inference_engine/cpp/ie_cnn_network.cpp
index 13f09261a9f..614be0ffa7e 100644
--- a/inference-engine/src/inference_engine/cpp/ie_cnn_network.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_cnn_network.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ie_icnn_network.hpp"
 #include "cpp/ie_cnn_network.h"
+#include "exception2status.hpp"
 
 #include "cnn_network_ngraph_impl.hpp"
 #include "ie_itt.hpp"
@@ -11,7 +11,7 @@
 namespace InferenceEngine {
 
 CNNNetwork::CNNNetwork() :
-    network(), actual(), output() {
+    network(), actual() {
 }
 
 CNNNetwork::CNNNetwork(std::shared_ptr<ICNNNetwork> network)
@@ -123,4 +123,10 @@ void CNNNetwork::serialize(const std::string& xmlPath, const std::string& binPat
     CALL_STATUS_FNC(serialize, xmlPath, binPath);
 }
 
+std::string CNNNetwork::getOVNameForTensor(const std::string& orig_name) const {
+    std::string ov_name;
+    CALL_STATUS_FNC(getOVNameForTensor, ov_name, orig_name);
+    return ov_name;
+}
+
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp b/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
index be2871da6a8..d282f4c88c5 100644
--- a/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
@@ -2,15 +2,15 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "cpp/ie_executable_network.hpp"
 #include "ie_common.h"
+
+#include "cpp/ie_executable_network.hpp"
+#include "ie_executable_network_base.hpp"
 #include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
-#include "cpp_interfaces/exception2status.hpp"
-#include "cpp_interfaces/base/ie_executable_network_base.hpp"
 
 namespace InferenceEngine {
 
-#define EXEC_NET_CALL_STATEMENT(...)                                                                        \
+#define EXEC_NET_CALL_STATEMENT(...)                                                               \
     if (_impl == nullptr) IE_THROW() << "ExecutableNetwork was not initialized.";                  \
     try {                                                                                          \
         __VA_ARGS__;                                                                               \
@@ -58,7 +58,7 @@ std::vector<VariableState> ExecutableNetwork::QueryState() {
     std::vector<VariableState> controller;
     EXEC_NET_CALL_STATEMENT(
         for (auto&& state : _impl->QueryState()) {
-            controller.emplace_back(std::make_shared<VariableStateBase>(state), _so);
+            controller.emplace_back(VariableState(state, _so));
         });
     return controller;
 }
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_executable_network_base.hpp b/inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp
similarity index 95%
rename from inference-engine/src/plugin_api/cpp_interfaces/base/ie_executable_network_base.hpp
rename to inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp
index fbbf77ebe76..cb3edc1e23e 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_executable_network_base.hpp
+++ b/inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp
@@ -16,11 +16,11 @@
 
 #include <ie_iexecutable_network.hpp>
 #include <cpp/ie_executable_network.hpp>
-#include <cpp_interfaces/base/ie_variable_state_base.hpp>
 #include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
 #include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
-#include "cpp_interfaces/exception2status.hpp"
-#include "cpp_interfaces/base/ie_infer_async_request_base.hpp"
+#include "cpp/exception2status.hpp"
+#include "ie_variable_state_base.hpp"
+#include "ie_infer_async_request_base.hpp"
 
 namespace InferenceEngine {
 
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_infer_async_request_base.hpp b/inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp
similarity index 98%
rename from inference-engine/src/plugin_api/cpp_interfaces/base/ie_infer_async_request_base.hpp
rename to inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp
index 5775c912b08..f0ddf0c69e5 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_infer_async_request_base.hpp
+++ b/inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp
@@ -8,9 +8,9 @@
 #include <memory>
 #include <string>
 
-#include "cpp_interfaces/exception2status.hpp"
+#include "cpp/exception2status.hpp"
 #include "cpp_interfaces/plugin_itt.hpp"
-#include <cpp_interfaces/base/ie_variable_state_base.hpp>
+#include "ie_variable_state_base.hpp"
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
 #include "ie_iinfer_request.hpp"
 #include "ie_preprocess.hpp"
diff --git a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp b/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp
index 54f8f533eab..a3052ecda78 100644
--- a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp
@@ -6,11 +6,12 @@
 #include <memory>
 #include <string>
 
-#include "cpp/ie_infer_request.hpp"
-#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
-#include "cpp_interfaces/base/ie_infer_async_request_base.hpp"
 #include "ie_remote_context.hpp"
 
+#include "cpp/ie_infer_request.hpp"
+#include "ie_infer_async_request_base.hpp"
+#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
+
 namespace InferenceEngine {
 
 #define CATCH_IE_EXCEPTION(ExceptionType) catch (const InferenceEngine::ExceptionType& e) {throw e;}
@@ -30,8 +31,8 @@ namespace InferenceEngine {
         CATCH_IE_EXCEPTION(NetworkNotRead)      \
         CATCH_IE_EXCEPTION(InferCancelled)
 
-#define INFER_REQ_CALL_STATEMENT(...)                                                                        \
-    if (_impl == nullptr) IE_THROW() << "Inference Requst is not initialized";                     \
+#define INFER_REQ_CALL_STATEMENT(...)                                                              \
+    if (_impl == nullptr) IE_THROW() << "Inference Request is not initialized";                    \
     try {                                                                                          \
         __VA_ARGS__                                                                                \
     } CATCH_IE_EXCEPTIONS catch (const std::exception& ex) {                                       \
@@ -197,7 +198,7 @@ std::vector<VariableState> InferRequest::QueryState() {
     std::vector<VariableState> controller;
     INFER_REQ_CALL_STATEMENT(
         for (auto&& state : _impl->QueryState()) {
-            controller.emplace_back(std::make_shared<VariableStateBase>(state), _so);
+            controller.emplace_back(VariableState(state, _so));
         }
     )
     return controller;
diff --git a/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp b/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp
index 434d91eb259..b50a03beb96 100644
--- a/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp
@@ -2,19 +2,31 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ie_imemory_state.hpp"
 #include "cpp/ie_memory_state.hpp"
+#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
+#include "exception2status.hpp"
+
+#define VARIABLE_CALL_STATEMENT(...)                                                               \
+    if (_impl == nullptr) IE_THROW() << "VariableState was not initialized.";                      \
+    try {                                                                                          \
+        __VA_ARGS__;                                                                               \
+    } CATCH_IE_EXCEPTIONS catch (const std::exception& ex) {                                       \
+        IE_THROW() << ex.what();                                                                   \
+    } catch (...) {                                                                                \
+        IE_THROW(Unexpected);                                                                      \
+    }
 
 namespace InferenceEngine {
 
-IE_SUPPRESS_DEPRECATED_START
-
-VariableState::VariableState(IVariableState::Ptr pState, details::SharedObjectLoader::Ptr plg) : actual(pState), plugin(plg) {
-    if (actual == nullptr) {
-        IE_THROW() << "VariableState wrapper was not initialized.";
+VariableState::VariableState(const std::shared_ptr<IVariableStateInternal>& impl,
+                             const details::SharedObjectLoader::Ptr& so) : _impl(impl), _so(so) {
+    if (impl == nullptr) {
+        IE_THROW(NotAllocated) << "VariableState wrapper was not initialized.";
     }
 }
 
+IE_SUPPRESS_DEPRECATED_START
+
 Blob::CPtr VariableState::GetLastState() const {
     return GetState();
 }
@@ -22,23 +34,19 @@ Blob::CPtr VariableState::GetLastState() const {
 IE_SUPPRESS_DEPRECATED_END
 
 void VariableState::Reset() {
-    CALL_STATUS_FNC_NO_ARGS(Reset);
+    VARIABLE_CALL_STATEMENT(_impl->Reset());
 }
 
 std::string VariableState::GetName() const {
-    char name[256];
-    CALL_STATUS_FNC(GetName, name, sizeof(name));
-    return name;
+    VARIABLE_CALL_STATEMENT(return _impl->GetName());
 }
 
 Blob::CPtr VariableState::GetState() const {
-    Blob::CPtr stateBlob;
-    CALL_STATUS_FNC(GetState, stateBlob);
-    return stateBlob;
+    VARIABLE_CALL_STATEMENT(return _impl->GetState());
 }
 
 void VariableState::SetState(Blob::Ptr state) {
-    CALL_STATUS_FNC(SetState, state);
+    VARIABLE_CALL_STATEMENT(_impl->SetState(state));
 }
 
 }  // namespace InferenceEngine
\ No newline at end of file
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_variable_state_base.hpp b/inference-engine/src/inference_engine/cpp/ie_variable_state_base.hpp
similarity index 97%
rename from inference-engine/src/plugin_api/cpp_interfaces/base/ie_variable_state_base.hpp
rename to inference-engine/src/inference_engine/cpp/ie_variable_state_base.hpp
index a992bb66a45..cd48f7b58d8 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/base/ie_variable_state_base.hpp
+++ b/inference-engine/src/inference_engine/cpp/ie_variable_state_base.hpp
@@ -6,7 +6,7 @@
 
 #include <memory>
 
-#include "cpp_interfaces/exception2status.hpp"
+#include "cpp/exception2status.hpp"
 #include "cpp_interfaces/impl/ie_variable_state_internal.hpp"
 #include "ie_imemory_state.hpp"
 
diff --git a/inference-engine/src/inference_engine/ie_common.cpp b/inference-engine/src/inference_engine/ie_common.cpp
index e35fdb7ab9c..c10c7a6c7bc 100644
--- a/inference-engine/src/inference_engine/ie_common.cpp
+++ b/inference-engine/src/inference_engine/ie_common.cpp
@@ -17,7 +17,6 @@
 #include <exec_graph_info.hpp>
 
 #include <ngraph/opsets/opset.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 
 namespace ExecGraphInfoSerialization {
 //
diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp
index 94047c8562f..1bc038ff071 100644
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@@ -16,7 +16,6 @@
 #include <ngraph/graph_util.hpp>
 #include <ngraph/pass/constant_folding.hpp>
 
-#include <cpp_interfaces/exception2status.hpp>
 #include "compilation_context.hpp"
 #include "ie_plugin_cpp.hpp"
 #include "ie_plugin_config.hpp"
diff --git a/inference-engine/src/inference_engine/ie_plugin_cpp.hpp b/inference-engine/src/inference_engine/ie_plugin_cpp.hpp
index 48c0e05cef5..d87b16765d6 100644
--- a/inference-engine/src/inference_engine/ie_plugin_cpp.hpp
+++ b/inference-engine/src/inference_engine/ie_plugin_cpp.hpp
@@ -17,21 +17,21 @@
 #include "cpp/ie_executable_network.hpp"
 #include "cpp/ie_cnn_network.h"
 #include "ie_plugin_ptr.hpp"
-#include "cpp_interfaces/exception2status.hpp"
+#include "cpp/exception2status.hpp"
 
 #if defined __GNUC__
 # pragma GCC diagnostic push
 # pragma GCC diagnostic ignored "-Wreturn-type"
 #endif
 
-#define CALL_STATEMENT(...)                                                                        \
-    if (!actual) IE_THROW() << "Wrapper used in the CALL_STATEMENT was not initialized.";  \
-    try {                                                                                          \
-        __VA_ARGS__;                                                                               \
-    } CATCH_IE_EXCEPTIONS catch (const std::exception& ex) {                                       \
-        IE_THROW() << ex.what();                                                           \
-    } catch (...) {                                                                                \
-        IE_THROW(Unexpected);                                                \
+#define PLUGIN_CALL_STATEMENT(...)                                                                \
+    if (!actual) IE_THROW() << "Wrapper used in the PLUGIN_CALL_STATEMENT was not initialized.";  \
+    try {                                                                                         \
+        __VA_ARGS__;                                                                              \
+    } CATCH_IE_EXCEPTIONS catch (const std::exception& ex) {                                      \
+        IE_THROW() << ex.what();                                                                  \
+    } catch (...) {                                                                               \
+        IE_THROW(Unexpected);                                                                     \
     }
 
 namespace InferenceEngine {
@@ -61,71 +61,71 @@ public:
     }
 
     void SetName(const std::string & deviceName) {
-        CALL_STATEMENT(actual->SetName(deviceName));
+        PLUGIN_CALL_STATEMENT(actual->SetName(deviceName));
     }
 
     void SetCore(ICore* core) {
-        CALL_STATEMENT(actual->SetCore(core));
+        PLUGIN_CALL_STATEMENT(actual->SetCore(core));
     }
 
     const Version GetVersion() const {
-        CALL_STATEMENT(return actual->GetVersion());
+        PLUGIN_CALL_STATEMENT(return actual->GetVersion());
     }
 
     void AddExtension(InferenceEngine::IExtensionPtr extension) {
-        CALL_STATEMENT(actual->AddExtension(extension));
+        PLUGIN_CALL_STATEMENT(actual->AddExtension(extension));
     }
 
     void SetConfig(const std::map<std::string, std::string>& config) {
-        CALL_STATEMENT(actual->SetConfig(config));
+        PLUGIN_CALL_STATEMENT(actual->SetConfig(config));
     }
 
     ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::map<std::string, std::string>& config) {
-        CALL_STATEMENT(return ExecutableNetwork(actual->LoadNetwork(network, config), actual));
+        PLUGIN_CALL_STATEMENT(return ExecutableNetwork(actual->LoadNetwork(network, config), actual));
     }
 
     ExecutableNetwork LoadNetwork(const CNNNetwork& network, RemoteContext::Ptr context, const std::map<std::string, std::string>& config) {
-        CALL_STATEMENT(return ExecutableNetwork(actual->LoadNetwork(network, config, context), actual));
+        PLUGIN_CALL_STATEMENT(return ExecutableNetwork(actual->LoadNetwork(network, config, context), actual));
     }
 
     QueryNetworkResult QueryNetwork(const CNNNetwork& network,
                                     const std::map<std::string, std::string>& config) const {
         QueryNetworkResult res;
-        CALL_STATEMENT(res = actual->QueryNetwork(network, config));
+        PLUGIN_CALL_STATEMENT(res = actual->QueryNetwork(network, config));
         if (res.rc != OK) IE_THROW() << res.resp.msg;
         return res;
     }
 
     ExecutableNetwork ImportNetwork(const std::string& modelFileName,
                                     const std::map<std::string, std::string>& config) {
-        CALL_STATEMENT(return ExecutableNetwork(actual->ImportNetwork(modelFileName, config), actual));
+        PLUGIN_CALL_STATEMENT(return ExecutableNetwork(actual->ImportNetwork(modelFileName, config), actual));
     }
 
     ExecutableNetwork ImportNetwork(std::istream& networkModel,
                                     const std::map<std::string, std::string>& config) {
-        CALL_STATEMENT(return ExecutableNetwork(actual->ImportNetwork(networkModel, config), actual));
+        PLUGIN_CALL_STATEMENT(return ExecutableNetwork(actual->ImportNetwork(networkModel, config), actual));
     }
 
     ExecutableNetwork ImportNetwork(std::istream& networkModel,
                                     const RemoteContext::Ptr& context,
                                     const std::map<std::string, std::string>& config) {
-        CALL_STATEMENT(return ExecutableNetwork(actual->ImportNetwork(networkModel, context, config), actual));
+        PLUGIN_CALL_STATEMENT(return ExecutableNetwork(actual->ImportNetwork(networkModel, context, config), actual));
     }
 
     Parameter GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
-        CALL_STATEMENT(return actual->GetMetric(name, options));
+        PLUGIN_CALL_STATEMENT(return actual->GetMetric(name, options));
     }
 
     RemoteContext::Ptr CreateContext(const ParamMap& params) {
-        CALL_STATEMENT(return actual->CreateContext(params));
+        PLUGIN_CALL_STATEMENT(return actual->CreateContext(params));
     }
 
     RemoteContext::Ptr GetDefaultContext(const ParamMap& params) {
-        CALL_STATEMENT(return actual->GetDefaultContext(params));
+        PLUGIN_CALL_STATEMENT(return actual->GetDefaultContext(params));
     }
 
     Parameter GetConfig(const std::string& name, const std::map<std::string, Parameter>& options) const {
-        CALL_STATEMENT(return actual->GetConfig(name, options));
+        PLUGIN_CALL_STATEMENT(return actual->GetConfig(name, options));
     }
 
     /**
@@ -145,7 +145,7 @@ public:
 };
 }  // namespace InferenceEngine
 
-#undef CALL_STATEMENT
+#undef PLUGIN_CALL_STATEMENT
 
 #if defined __GNUC__
 # pragma GCC diagnostic pop
diff --git a/inference-engine/src/legacy_api/include/legacy/graph_transformer.h b/inference-engine/src/legacy_api/include/legacy/graph_transformer.h
index b8b66b804ca..26b45faf30c 100644
--- a/inference-engine/src/legacy_api/include/legacy/graph_transformer.h
+++ b/inference-engine/src/legacy_api/include/legacy/graph_transformer.h
@@ -10,7 +10,6 @@
 #pragma once
 
 #include <caseless.hpp>
-#include <ie_icnn_network.hpp>
 #include <map>
 #include <string>
 #include <vector>
diff --git a/inference-engine/src/legacy_api/src/network_serializer_v7.hpp b/inference-engine/src/legacy_api/src/network_serializer_v7.hpp
index a97f5c8575b..788f757ed86 100644
--- a/inference-engine/src/legacy_api/src/network_serializer_v7.hpp
+++ b/inference-engine/src/legacy_api/src/network_serializer_v7.hpp
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <ie_icnn_network.hpp>
 #include <legacy/ie_layers.h>
 
 #include <string>
diff --git a/inference-engine/src/multi_device/multi_device_exec_network.cpp b/inference-engine/src/multi_device/multi_device_exec_network.cpp
index 2edf0c89234..9e42cbe0809 100644
--- a/inference-engine/src/multi_device/multi_device_exec_network.cpp
+++ b/inference-engine/src/multi_device/multi_device_exec_network.cpp
@@ -13,7 +13,6 @@
 
 
 #include "ie_metric_helpers.hpp"
-#include <cpp_interfaces/base/ie_infer_async_request_base.hpp>
 #include <multi-device/multi_device_config.hpp>
 #include <ie_plugin_config.hpp>
 #include "multi_device_exec_network.hpp"
diff --git a/inference-engine/src/multi_device/multi_device_infer_request.cpp b/inference-engine/src/multi_device/multi_device_infer_request.cpp
index 3cf4f2a7f1f..bebf27ec662 100644
--- a/inference-engine/src/multi_device/multi_device_infer_request.cpp
+++ b/inference-engine/src/multi_device/multi_device_infer_request.cpp
@@ -6,7 +6,6 @@
 
 #include "multi_device_infer_request.hpp"
 #include <ie_input_info.hpp>
-#include <ie_icnn_network.hpp>
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
 #include <blob_factory.hpp>
 
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp
index 93e71701a9f..d209b2061a7 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp
@@ -9,7 +9,6 @@
 #include <string>
 #include <vector>
 
-#include "cpp_interfaces/base/ie_infer_async_request_base.hpp"
 #include "cpp_interfaces/impl/ie_executable_network_internal.hpp"
 #include "cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp"
 #include "threading/ie_cpu_streams_executor.hpp"
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp
index 11ca5ce2e74..5d98772a65a 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp
@@ -9,7 +9,6 @@
 #include <threading/ie_istreams_executor.hpp>
 
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 #include <ie_system_conf.h>
 
 #include <exception>
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_plugin_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_plugin_internal.hpp
index b1ac89bb61d..2621c73a340 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_plugin_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_plugin_internal.hpp
@@ -16,7 +16,6 @@
 #include <string>
 #include <limits>
 
-#include "cpp_interfaces/base/ie_executable_network_base.hpp"
 #include "cpp_interfaces/impl/ie_executable_network_internal.hpp"
 #include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
 #include "cpp_interfaces/plugin_itt.hpp"
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp
index b4f7feb6129..bd43eab73d0 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iinfer_request_internal.hpp
@@ -8,7 +8,6 @@
 #include <ie_common.h>
 #include <ie_preprocess_data.hpp>
 #include <ie_input_info.hpp>
-#include <ie_icnn_network.hpp>
 #include <cpp/ie_infer_request.hpp>
 
 #include <map>
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
index e34d55b02a2..894605d3d79 100644
--- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
@@ -11,7 +11,6 @@
 
 #include <ie_iextension.h>
 #include <ie_input_info.hpp>
-#include <ie_icnn_network.hpp>
 #include <ie_icore.hpp>
 #include <ie_parameter.hpp>
 #include <ie_remote_context.hpp>
diff --git a/inference-engine/src/readers/ir_reader/ie_ir_reader.hpp b/inference-engine/src/readers/ir_reader/ie_ir_reader.hpp
index 1a2bd2ad2ca..6e1c092bc79 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_reader.hpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_reader.hpp
@@ -9,7 +9,6 @@
 #include <ie_common.h>
 #include <ie_iextension.h>
 
-#include <ie_icnn_network.hpp>
 #include <ie_reader.hpp>
 #include <map>
 #include <memory>
diff --git a/inference-engine/src/readers/ir_reader_v7/parsers.h b/inference-engine/src/readers/ir_reader_v7/parsers.h
index 0493c39a295..b0775364475 100644
--- a/inference-engine/src/readers/ir_reader_v7/parsers.h
+++ b/inference-engine/src/readers/ir_reader_v7/parsers.h
@@ -4,8 +4,6 @@
 
 #pragma once
 
-#include <ie_icnn_network.hpp>
-
 #include <legacy/cnn_network_impl.hpp>
 
 namespace pugi {
diff --git a/inference-engine/src/vpu/common/src/parsed_config_base.cpp b/inference-engine/src/vpu/common/src/parsed_config_base.cpp
index 93990e9aab8..876232bd15e 100644
--- a/inference-engine/src/vpu/common/src/parsed_config_base.cpp
+++ b/inference-engine/src/vpu/common/src/parsed_config_base.cpp
@@ -12,7 +12,6 @@
 #include <memory>
 #include <map>
 
-#include <cpp_interfaces/exception2status.hpp>
 #include <ie_plugin_config.hpp>
 
 namespace vpu {
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/blob_reader.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/blob_reader.hpp
index fc4a4bca4b6..37f2ec63dfc 100644
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/blob_reader.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/blob_reader.hpp
@@ -8,7 +8,6 @@
 #include <utility>
 
 #include <ie_input_info.hpp>
-#include <ie_icnn_network.hpp>
 
 #include <vpu/backend/blob_format.hpp>
 #include <vpu/model/data_desc.hpp>
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
index 892e6f3c3c5..c0881b9b0c6 100644
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
@@ -15,7 +15,6 @@
 #include <utility>
 
 #include <ie_icore.hpp>
-#include <ie_icnn_network.hpp>
 #include <caseless.hpp>
 
 #include <vpu/utils/enums.hpp>
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp
index 4b6b85c49d5..c57a7bd84cc 100644
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp
@@ -9,8 +9,6 @@
 #include <functional>
 #include <set>
 
-#include <ie_icnn_network.hpp>
-
 #include <vpu/model/base.hpp>
 #include <vpu/model/edges.hpp>
 #include <vpu/model/data.hpp>
diff --git a/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp b/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
index 2b5d0ab31a1..5e8779a7312 100644
--- a/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
@@ -13,7 +13,6 @@
 #include <map>
 
 #include <debug.h>
-#include <cpp_interfaces/exception2status.hpp>
 #include <ie_plugin_config.hpp>
 
 #include <vpu/utils/string.hpp>
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_config.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_config.cpp
index 6258a0c8818..e37aa804809 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_config.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_config.cpp
@@ -9,8 +9,6 @@
 #include <unordered_map>
 #include <unordered_set>
 
-#include <cpp_interfaces/exception2status.hpp>
-
 #include <vpu/vpu_plugin_config.hpp>
 #include <vpu/myriad_config.hpp>
 
diff --git a/inference-engine/tests/functional/inference_engine/async_infer_request_test.cpp b/inference-engine/tests/functional/inference_engine/async_infer_request_test.cpp
index 359cbbfeda9..8e8a23176af 100644
--- a/inference-engine/tests/functional/inference_engine/async_infer_request_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/async_infer_request_test.cpp
@@ -5,7 +5,6 @@
 #include <gtest/gtest.h>
 
 #include <cpp/ie_infer_request.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 
 using namespace ::testing;
 using namespace std;
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
index 76dc7c4cf86..a5a98eb5fab 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
@@ -22,7 +22,6 @@
 #include <functional_test_utils/skip_tests_config.hpp>
 #include <common_test_utils/common_utils.hpp>
 #include <common_test_utils/test_assertions.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 
 #ifdef ENABLE_UNICODE_PATH_SUPPORT
 #include <iostream>
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request.hpp
index ac0998d53b9..7adc5edbd4c 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request.hpp
@@ -18,7 +18,6 @@
 #include "multi-device/multi_device_config.hpp"
 #include <string>
 #include <ie_core.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 #include <thread>
 #include <base/behavior_test_utils.hpp>
 #include "common_test_utils/common_utils.hpp"
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_input.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_input.hpp
index e0d77199996..30f1cfd95b0 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_input.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_input.hpp
@@ -15,7 +15,6 @@
 #include "ngraph_functions/builders.hpp"
 #include "multi-device/multi_device_config.hpp"
 #include <ie_core.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 #include <base/behavior_test_utils.hpp>
 #include "common_test_utils/common_utils.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_output.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_output.hpp
index 60a56f1adeb..272a5cbdf90 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_output.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/infer_request_output.hpp
@@ -15,7 +15,6 @@
 #include "ngraph_functions/builders.hpp"
 #include "multi-device/multi_device_config.hpp"
 #include <ie_core.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 #include <base/behavior_test_utils.hpp>
 #include "common_test_utils/common_utils.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/test_assertions.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/test_assertions.hpp
index be0eb055f1c..8fbfab0b7a7 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/test_assertions.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/test_assertions.hpp
@@ -7,10 +7,10 @@
 #include <string>
 
 #include <ie_data.h>
+#include <ie_input_info.hpp>
 #include <ie_blob.h>
 #include <ie_common.h>
 #include <ie_preprocess.hpp>
-#include <ie_icnn_network.hpp>
 
 inline bool strContains(const std::string & str, const std::string & substr) {
     return str.find(substr) != std::string::npos;
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
index 24541acdb90..32cd3485bd0 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
@@ -11,7 +11,6 @@
 #include <gmock/gmock.h>
 
 #include "ie_input_info.hpp"
-#include "ie_icnn_network.hpp"
 #include <cpp_interfaces/impl/ie_executable_network_internal.hpp>
 
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinfer_request_internal.hpp"
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
index d1e903e18b7..885b5e9b1d8 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.cpp
@@ -8,7 +8,6 @@
 #include <string>
 
 #include "mock_plugin.hpp"
-#include <cpp_interfaces/exception2status.hpp>
 #include "description_buffer.hpp"
 
 using namespace std;
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
index 8b8a0beba5a..33b8feadcc5 100644
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_engine/mock_plugin.hpp
@@ -8,7 +8,6 @@
 #include <string>
 
 #include <cpp_interfaces/impl/ie_plugin_internal.hpp>
-#include <ie_icnn_network.hpp>
 
 class MockPlugin : public InferenceEngine::InferencePluginInternal {
     InferenceEngine::IInferencePlugin * _target = nullptr;
diff --git a/inference-engine/tests/functional/inference_engine/exception_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/exception_test.cpp
similarity index 98%
rename from inference-engine/tests/functional/inference_engine/exception_test.cpp
rename to inference-engine/tests/unit/inference_engine/cpp_interfaces/exception_test.cpp
index 36a869be600..be1aa1c10ff 100644
--- a/inference-engine/tests/functional/inference_engine/exception_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/exception_test.cpp
@@ -4,7 +4,7 @@
 
 #include <gtest/gtest.h>
 
-#include <cpp_interfaces/exception2status.hpp>
+#include <cpp/exception2status.hpp>
 
 using namespace InferenceEngine;
 
diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_executable_network_base_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_executable_network_base_test.cpp
deleted file mode 100644
index 85031f6d316..00000000000
--- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_executable_network_base_test.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <gmock/gmock-spec-builders.h>
-
-#include <cpp_interfaces/base/ie_executable_network_base.hpp>
-
-#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_executable_thread_safe_default.hpp"
-#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinfer_request_internal.hpp"
-
-using namespace ::testing;
-using namespace std;
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-IE_SUPPRESS_DEPRECATED_START
-
-class ExecutableNetworkThreadSafeTests : public ::testing::Test {
-protected:
-    shared_ptr<MockExecutableNetworkThreadSafe> mockExeNetwork;
-    shared_ptr<IExecutableNetwork> exeNetwork;
-    shared_ptr<MockIInferRequestInternal> mockInferRequestInternal;
-    ResponseDesc dsc;
-    StatusCode sts;
-
-    virtual void TearDown() {
-        EXPECT_TRUE(Mock::VerifyAndClearExpectations(mockInferRequestInternal.get()));
-        EXPECT_TRUE(Mock::VerifyAndClearExpectations(mockExeNetwork.get()));
-    }
-
-    virtual void SetUp() {
-        mockExeNetwork = make_shared<MockExecutableNetworkThreadSafe>();
-        exeNetwork = std::make_shared<ExecutableNetworkBase>(mockExeNetwork);
-        InputsDataMap networkInputs;
-        OutputsDataMap networkOutputs;
-        mockInferRequestInternal = make_shared<MockIInferRequestInternal>(networkInputs, networkOutputs);
-    }
-};
-
-TEST_F(ExecutableNetworkThreadSafeTests, createInferRequestCallsThreadSafeImplAndSetNetworkIO) {
-    IInferRequest::Ptr req;
-    EXPECT_CALL(*mockExeNetwork.get(), CreateInferRequestImpl(_, _)).WillOnce(Return(mockInferRequestInternal));
-    EXPECT_NO_THROW(exeNetwork->CreateInferRequest(req, &dsc));
-    auto threadSafeReq = dynamic_pointer_cast<InferRequestBase>(req);
-    ASSERT_NE(threadSafeReq, nullptr);
-}
-
-TEST_F(ExecutableNetworkThreadSafeTests, returnErrorIfInferThrowsException) {
-    IInferRequest::Ptr req;
-    EXPECT_CALL(*mockExeNetwork.get(), CreateInferRequestImpl(_, _)).WillOnce(Return(mockInferRequestInternal));
-    EXPECT_NO_THROW(exeNetwork->CreateInferRequest(req, &dsc));
-    EXPECT_CALL(*mockInferRequestInternal.get(), checkBlobs()).WillOnce(Throw(std::runtime_error("")));
-    EXPECT_NO_THROW(sts = req->Infer(&dsc));
-    ASSERT_EQ(StatusCode::GENERAL_ERROR, sts) << dsc.msg;
-}
-
-TEST_F(ExecutableNetworkThreadSafeTests, returnErrorIfStartAsyncThrowsException) {
-    IInferRequest::Ptr req;
-    EXPECT_CALL(*mockExeNetwork.get(), CreateInferRequestImpl(_, _)).WillOnce(Return(mockInferRequestInternal));
-    EXPECT_NO_THROW(exeNetwork->CreateInferRequest(req, &dsc));
-    EXPECT_CALL(*mockInferRequestInternal.get(), InferImpl()).WillOnce(Throw(std::runtime_error("")));
-    EXPECT_NO_THROW(sts = req->StartAsync(&dsc));
-    ASSERT_TRUE(StatusCode::OK == sts) << dsc.msg;
-    EXPECT_NO_THROW(sts = req->Wait(InferRequest::WaitMode::RESULT_READY, &dsc));
-    ASSERT_EQ(StatusCode::GENERAL_ERROR, sts) << dsc.msg;
-}
diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_base_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_base_test.cpp
index f849cb0753a..a848d66fda6 100644
--- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_base_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_base_test.cpp
@@ -9,8 +9,7 @@
 #include <cpp/ie_infer_request.hpp>
 #include <cpp/ie_executable_network.hpp>
 #include <ie_plugin_cpp.hpp>
-#include <cpp_interfaces/exception2status.hpp>
-#include <cpp_interfaces/base/ie_infer_async_request_base.hpp>
+#include <cpp/ie_infer_async_request_base.hpp>
 
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_thread_safe_default_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_thread_safe_default_test.cpp
index 50a0bbae9e0..520da77a9bb 100644
--- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_thread_safe_default_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_infer_async_request_thread_safe_default_test.cpp
@@ -9,7 +9,7 @@
 
 #include <inference_engine.hpp>
 #include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
-#include <cpp_interfaces/base/ie_infer_async_request_base.hpp>
+#include <cpp/ie_infer_async_request_base.hpp>
 #include <threading/ie_cpu_streams_executor.hpp>
 
 #include "unit_test_utils/mocks/cpp_interfaces/mock_task_executor.hpp"
diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_memory_state_internal_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_memory_state_internal_test.cpp
index ef2d75587d4..c37408606d2 100644
--- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_memory_state_internal_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_memory_state_internal_test.cpp
@@ -6,8 +6,8 @@
 #include <gmock/gmock-spec-builders.h>
 #include <cpp/ie_executable_network.hpp>
 
-#include <cpp_interfaces/base/ie_executable_network_base.hpp>
-#include <cpp_interfaces/base/ie_infer_async_request_base.hpp>
+#include <cpp/ie_executable_network_base.hpp>
+#include <cpp/ie_infer_async_request_base.hpp>
 
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp"
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
diff --git a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
index fcc0f8b5ed9..a7dbba29a06 100644
--- a/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/cpp_interfaces/ie_plugin_test.cpp
@@ -8,7 +8,7 @@
 #include <ie_version.hpp>
 #include <ie_plugin_cpp.hpp>
 
-#include <cpp_interfaces/base/ie_infer_async_request_base.hpp>
+#include <cpp/ie_infer_async_request_base.hpp>
 #include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
 
 #include "unit_test_utils/mocks/mock_not_empty_icnn_network.hpp"
diff --git a/inference-engine/tests/unit/inference_engine/ie_exception_test.cpp b/inference-engine/tests/unit/inference_engine/ie_exception_test.cpp
index 5a08a4f81a0..6ea39e9b4e8 100644
--- a/inference-engine/tests/unit/inference_engine/ie_exception_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/ie_exception_test.cpp
@@ -7,9 +7,7 @@
 #include <gmock/gmock.h>
 
 #include "ie_common.h"
-#include "cpp_interfaces/exception2status.hpp"
 
-// TODO: cover <cpp_interfaces/exception2status.hpp> and <details/ie_exception_conversion.hpp> from
 //  tests/unit/inference_engine/exception_test.cpp
 
 TEST(ExceptionTests, CanThrowUsingMacro) {
diff --git a/inference-engine/tests/unit/inference_engine/ie_executable_network_test.cpp b/inference-engine/tests/unit/inference_engine/ie_executable_network_test.cpp
index 5feb57ea9ea..e205be6cd92 100644
--- a/inference-engine/tests/unit/inference_engine/ie_executable_network_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/ie_executable_network_test.cpp
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "cpp/ie_executable_network.hpp"
+#include "cpp/ie_executable_network_base.hpp"
 #include "ie_plugin_cpp.hpp"
 
 #include "unit_test_utils/mocks/mock_iexecutable_network.hpp"
diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin.h b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin.h
index cc94c94f9c8..c3c76daf654 100644
--- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin.h
+++ b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin.h
@@ -13,7 +13,6 @@
 #include <vpu/private_plugin_config.hpp>
 #include <gna/gna_config.hpp>
 #include <multi-device/multi_device_config.hpp>
-#include <cpp_interfaces/exception2status.hpp>
 #include <common_test_utils/test_assertions.hpp>
 #include <memory>
 #include <fstream>
diff --git a/inference-engine/tests_deprecated/functional/ie_tests/include/object_detection_matcher.hpp b/inference-engine/tests_deprecated/functional/ie_tests/include/object_detection_matcher.hpp
index 7975b6faf16..e862e024c93 100644
--- a/inference-engine/tests_deprecated/functional/ie_tests/include/object_detection_matcher.hpp
+++ b/inference-engine/tests_deprecated/functional/ie_tests/include/object_detection_matcher.hpp
@@ -9,7 +9,6 @@
 #include <gtest/gtest.h>
 #include "base_matcher.hpp"
 #include <math.h>
-#include <ie_icnn_network.hpp>
 
 namespace Regression {
 namespace Matchers {
diff --git a/inference-engine/tests_deprecated/helpers/tests_common.hpp b/inference-engine/tests_deprecated/helpers/tests_common.hpp
index 1671339ffa3..20667e1e2f1 100644
--- a/inference-engine/tests_deprecated/helpers/tests_common.hpp
+++ b/inference-engine/tests_deprecated/helpers/tests_common.hpp
@@ -18,7 +18,6 @@
 #include <legacy/ie_layers.h>
 #include <ie_blob.h>
 #include <ie_input_info.hpp>
-#include <ie_icnn_network.hpp>
 
 #include "test_model_repo.hpp"
 #include "test_model_path.hpp"
@@ -166,14 +165,6 @@ public:
     # error Unsupported architecture
 #endif
 
-inline InferenceEngine::InputInfo::Ptr getFirstInput(InferenceEngine::ICNNNetwork *pNet)
-{
-    InferenceEngine::InputsDataMap inputs;
-    pNet->getInputsInfo(inputs);
-    //ASSERT_GT(inputs.size(), 0);
-    return inputs.begin()->second;
-}
-
 /**
  * @brief Splits the RGB channels to either I16 Blob or float blob.
  *
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
index cbff4680a99..16074bf4ba6 100644
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
@@ -18,7 +18,6 @@
 #include <legacy/graph_tools.hpp>
 #include <ngraph/function.hpp>
 #include <ie_precision.hpp>
-#include <ie_icnn_network.hpp>
 #include <ie_blob.h>
 #include <ie_plugin_config.hpp>
 #include <cpp/ie_cnn_network.h>
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
index a6a3b737cde..9c57834cbd1 100644
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
@@ -20,10 +20,6 @@ public:
     }
 };
 
-struct TestExecutableNetworkBase : public InferenceEngine::ExecutableNetworkBase {
-    using InferenceEngine::ExecutableNetworkBase::_impl;
-};
-
 static MKLDNNPlugin::MKLDNNGraph& getGraph(InferenceEngine::IExecutableNetworkInternal::Ptr execNetwork) {
     return static_cast<MKLDNNTestExecNetwork*>(execNetwork.get())->getGraph();
 }

From c52117a09faef14f36db3b869decb1f8cdd52abe Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Fri, 30 Apr 2021 08:57:03 +0300
Subject: [PATCH 18/73] Fixed compilation with ninja + clang (#5469)

---
 .../src/gna_plugin/frontend/scale_factor_calc.hpp        | 4 ++--
 inference-engine/src/gna_plugin/gna_graph_compiler.cpp   | 1 +
 inference-engine/src/gna_plugin/gna_graph_compiler.hpp   | 2 +-
 inference-engine/src/gna_plugin/gna_plugin.cpp           | 1 +
 .../functional/plugin/gna/pass_tests/fq_activation.cpp   | 2 +-
 .../pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp | 9 +++------
 inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt | 2 --
 7 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
index 2c9e7ecfa76..1a4dabd2db6 100644
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -858,8 +858,8 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
                 auto minScaleFactor = quantParamsFirst->_dst_quant.GetScale();
                 for (auto it = inputLayers.begin(); it != inputLayers.end(); ++it) {
                     auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*it);
-                    if (quantParams->_dst_quant.GetScale() < minScaleFactor &&
-                        !fp32eq(quantParams->_dst_quant.GetScale(), 1.0f) ||
+                    if ((quantParams->_dst_quant.GetScale() < minScaleFactor &&
+                         !fp32eq(quantParams->_dst_quant.GetScale(), 1.0f)) ||
                         fp32eq(minScaleFactor, 1.0f)) {
                         minScaleFactor = quantParams->_dst_quant.GetScale();
                         sourceLayerIt = it;
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 4dee09ea498..76e2f81940e 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -43,6 +43,7 @@ using namespace GNAPluginNS;
 
 #define CREATE(name) [](GNAGraphCompiler *p, CNNLayerPtr l) {p->name(l);}
 
+const GNALimitations::Cnn2D::Validator GNAGraphCompiler::cnn2dValidator;
 
 void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr) {
     this->gnamem = std::move(gnaMemPtr);
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
index 27246b443aa..e3e5f265084 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
@@ -52,7 +52,7 @@ private:
     static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&);
     std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
 
-    const GNALimitations::Cnn2D::Validator cnn2dValidator;
+    static const GNALimitations::Cnn2D::Validator cnn2dValidator;
 
 public:
     GNAPluginNS::backend::DnnComponents dnnComponents;
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index 0559dbf24b8..df0a71fc1ce 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -1519,6 +1519,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
         return gnaOperation.Type == Gna2OperationTypeConvolution ?
             kDnnNonInterleavedOrientation : kDnnInterleavedOrientation;
     };
+    (void)getOrientation;
 #else
     auto getOrientation = [](intel_nnet_layer_t & layer) {
         return layer.nLayerKind == INTEL_CONVOLUTIONAL ?
diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_activation.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_activation.cpp
index c996d06205b..ff0495f756e 100644
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_activation.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_activation.cpp
@@ -59,7 +59,7 @@ public:
         return result.str();
     }
 
-    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
         return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution);
     }
 
diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
index 947d27a9b33..f625609b023 100644
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp
@@ -158,10 +158,9 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
         }
 
     protected:
-        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
             InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
             blob->allocate();
-            auto precision = info.getPrecision();
 
             auto* rawBlobDataPtr = blob->buffer().as<float*>();
             std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), -0.2f, 0.2f);
@@ -250,10 +249,9 @@ class RemovePermutationsWithTwoConvTest : public testing::WithParamInterface<rem
         }
 
     protected:
-        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
             InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
             blob->allocate();
-            auto precision = info.getPrecision();
 
             auto* rawBlobDataPtr = blob->buffer().as<float*>();
             std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), 0.0f, 0.5f);
@@ -338,10 +336,9 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
         }
 
     protected:
-        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+        InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
             InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
             blob->allocate();
-            auto precision = info.getPrecision();
 
             auto* rawBlobDataPtr = blob->buffer().as<float*>();
             std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), -0.2f, 0.2f);
diff --git a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt
index f0e88da5c02..37fd33e136b 100644
--- a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt
+++ b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt
@@ -35,8 +35,6 @@ endif()
 if(NOT WIN32)
     target_compile_options(${TARGET_NAME}
             PRIVATE
-            -MMD
-            -MP
             -Wformat
             -Wformat-security
             -Wall)

From cec1e4ae1c76376f5126a5858674aac4f3bbd84b Mon Sep 17 00:00:00 2001
From: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
Date: Fri, 30 Apr 2021 08:53:32 +0200
Subject: [PATCH 19/73] Remove new line symbol (#5459)

---
 docs/ops/arithmetic/Sqrt_1.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ops/arithmetic/Sqrt_1.md b/docs/ops/arithmetic/Sqrt_1.md
index ade93d94b57..9f2d1f665d7 100644
--- a/docs/ops/arithmetic/Sqrt_1.md
+++ b/docs/ops/arithmetic/Sqrt_1.md
@@ -12,8 +12,8 @@
 o_{i} = \sqrt{a_{i}}
 \f]
 
-If the input value is negative, then the result is undefined.\
-For integer element type the result is rounded (half up) to the nearest integer value.
+* If the input value is negative, then the result is undefined.
+* For integer element type the result is rounded (half up) to the nearest integer value.
 
 **Attributes**: *Sqrt* operation has no attributes.
 

From ff9e67e73253c89c7dc77c85e7e5fa60cc49042f Mon Sep 17 00:00:00 2001
From: Ilya Churaev <ilya.churaev@intel.com>
Date: Fri, 30 Apr 2021 10:37:27 +0300
Subject: [PATCH 20/73] Skip MVN operation on constant branches (#5460)

* Skip MVN operation on constant branches

* Added test
---
 .../src/legacy_api/src/graph_transformer.cpp  |  1 +
 .../util_const_infer_test.cpp                 | 33 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/inference-engine/src/legacy_api/src/graph_transformer.cpp b/inference-engine/src/legacy_api/src/graph_transformer.cpp
index 1545e5699d2..4505a51230c 100644
--- a/inference-engine/src/legacy_api/src/graph_transformer.cpp
+++ b/inference-engine/src/legacy_api/src/graph_transformer.cpp
@@ -221,6 +221,7 @@ static std::vector<std::string> skipConstInfer = {
     "Squeeze",
     "TensorIterator",
     "LSTMSequence",
+    "MVN"
 };
 
 const std::map<std::string, bool> ConstTransformer::getConstLayers(const std::vector<CNNLayerPtr>& sortedLayers) {
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/util_const_infer_test.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/util_const_infer_test.cpp
index 006a6c7cff5..49cb14b98e0 100644
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/util_const_infer_test.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/util_const_infer_test.cpp
@@ -703,6 +703,39 @@ TEST_F(AdvancedShapeInferTests, canFullTrimConstToReshape) {
     ASSERT_EQ(layer1->insData[0].lock(), getData("data1"));
 }
 
+TEST_F(AdvancedShapeInferTests, canFullTrimConstToMVN) {
+    //
+    //      I2-d2
+    //          \
+    //  I1-d1-Reshape-d3-L2-d4
+    //
+    net = netBuilder
+            .data("data1", IE::TensorDesc(IE::Precision::FP32, IE::SizeVector{3, 1, 1}, IE::Layout::CHW))
+            .data("data2", IE::TensorDesc(IE::Precision::FP32, IE::SizeVector{3}, IE::Layout::C))
+            .data("data3", IE::TensorDesc(IE::Precision::FP32, IE::SizeVector{1, 1, 1}, IE::Layout::CHW))
+            .data("data4", IE::TensorDesc(IE::Precision::FP32, IE::SizeVector{1, 1, 1}, IE::Layout::CHW))
+            .layer<IE::CNNLayer>(IE::LayerParams{"input1", "Const", IE::Precision::I32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Const", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer1", "MVN", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer2", "dummy", IE::Precision::FP32})
+            .linkToData("input1", "data1")
+            .linkToData("input2", "data2")
+            .linkDataTo("data1", "layer1")
+            .linkDataTo("data2", "layer1")
+            .linkToData("layer1", "data3")
+            .linkDataTo("data3", "layer2")
+            .linkToData("layer2", "data4")
+            .addInput("data1")
+            .addInput("data2")
+            .finalize();
+
+    IE::BlobMap refBlobs = initConstLayers({"input1", "input2"});
+    auto layer1 = getLayer("layer1");
+
+    IE::ConstTransformer transformator(net.get());
+    ASSERT_NO_THROW(transformator.fullTrim());
+}
+
 TEST_F(AdvancedShapeInferTests, canReshape) {
     //
     // I2-d2-Shape

From 8b1b900591f2207b7ea7ff0328f410fa0e4aa5a9 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Fri, 30 Apr 2021 10:47:29 +0300
Subject: [PATCH 21/73] CVS-44774: Fixed preprocessing for template plugin
 (#4118)

* Fixed preprocessing for template plugin

* Added more tests instances

* Split common transformation to smaller ones which can be used by plugins

* Moved preprocessing transformation to Plugin API

* Added PreprocessConversionTest tests

* Disabled tests on GPU: CVS-51764

* Disabled some tests on VPU and TEMPLATE

* Support for input layout conversions in TEMPLATE plugin

* Improvements in Template Plugin

* Fixed compilation

* Fixes

* Disables some tests

* Fixed compilation on Windows

* Fixed docs
---
 docs/nGraph_DG/nGraphTransformation.md        |  24 +--
 docs/template_plugin/src/CMakeLists.txt       |   2 +-
 docs/template_plugin/src/template_config.cpp  |   2 +-
 .../src/template_executable_network.cpp       |  26 ++-
 .../src/template_executable_network.hpp       |   8 +-
 .../src/template_infer_request.cpp            |  25 ++-
 docs/template_plugin/src/template_plugin.cpp  |  20 +-
 .../preprocessing/mean_image_or_value.cpp     |  48 +++++
 .../preprocessing/mean_image_or_value.hpp     |  33 ++++
 .../preprocessing/preprocessing.cpp           | 101 ++++++++++
 .../preprocessing/preprocessing.hpp           |  35 ++++
 .../preprocessing/std_scale.cpp               |  48 +++++
 .../preprocessing/std_scale.hpp               |  33 ++++
 .../template_function_transformation.cpp      |   0
 .../template_function_transformation.hpp      |   0
 .../template_pattern_transformation.cpp       |   4 +-
 .../template_pattern_transformation.hpp       |   2 +-
 .../behavior/preprocessing.cpp                |   4 +-
 .../behavior/set_preprocess.cpp               |  23 +++
 .../tests/functional/skip_tests_config.cpp    |   6 +-
 .../transformations/preprocessing.cpp         | 183 ++++++++++++++++++
 .../src/plugin_api/precision_utils.h          |   3 +
 .../include/behavior/set_preprocess.hpp       |  11 +-
 23 files changed, 589 insertions(+), 52 deletions(-)
 create mode 100644 docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.cpp
 create mode 100644 docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.hpp
 create mode 100644 docs/template_plugin/src/transformations/preprocessing/preprocessing.cpp
 create mode 100644 docs/template_plugin/src/transformations/preprocessing/preprocessing.hpp
 create mode 100644 docs/template_plugin/src/transformations/preprocessing/std_scale.cpp
 create mode 100644 docs/template_plugin/src/transformations/preprocessing/std_scale.hpp
 rename docs/template_plugin/src/{ => transformations}/template_function_transformation.cpp (100%)
 rename docs/template_plugin/src/{ => transformations}/template_function_transformation.hpp (100%)
 rename docs/template_plugin/src/{ => transformations}/template_pattern_transformation.cpp (97%)
 rename docs/template_plugin/src/{ => transformations}/template_pattern_transformation.hpp (93%)
 create mode 100644 docs/template_plugin/tests/functional/transformations/preprocessing.cpp

diff --git a/docs/nGraph_DG/nGraphTransformation.md b/docs/nGraph_DG/nGraphTransformation.md
index 96e89ec7691..5e88ccdf12c 100644
--- a/docs/nGraph_DG/nGraphTransformation.md
+++ b/docs/nGraph_DG/nGraphTransformation.md
@@ -61,9 +61,9 @@ nGraph has three main transformation types:
 
 Template for FunctionPass transformation class
 
-@snippet src/template_function_transformation.hpp function_pass:template_transformation_hpp
+@snippet src/transformations/template_function_transformation.hpp function_pass:template_transformation_hpp
 
-@snippet src/template_function_transformation.cpp function_pass:template_transformation_cpp
+@snippet src/transformations/template_function_transformation.cpp function_pass:template_transformation_cpp
 
 Using `ngraph::FunctionPass`, you need to override the `run_on_function` method where you will write the transformation code.
 Return value is `true` if the original function has changed during transformation (new operation was added, or operations replacement was made, or node attributes were changed); otherwise, it is `false`.
@@ -75,9 +75,9 @@ Also `ngraph::FunctionPass` based transformations can be executed via `pass::Man
 `ngraph::pass::MatcherPass` is used for pattern-based transformations.
 
 Template for MatcherPass transformation class
-@snippet src/template_pattern_transformation.hpp graph_rewrite:template_transformation_hpp
+@snippet src/transformations/template_pattern_transformation.hpp graph_rewrite:template_transformation_hpp
 
-@snippet src/template_pattern_transformation.cpp graph_rewrite:template_transformation_cpp
+@snippet src/transformations/template_pattern_transformation.cpp graph_rewrite:template_transformation_cpp
 
 To use `ngraph::pass::MatcherPass`, you need to complete these steps:
 1. Create a pattern
@@ -113,7 +113,7 @@ That means that matcher passes registered in `pass::GraphRewrite` will be applie
 
 The example below shows how single MatcherPass can fuse sequence of operations using the `register_new_node` method.
 
-@snippet src/template_pattern_transformation.cpp matcher_pass:relu_fusion
+@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:relu_fusion
 
 > **NOTE**: If you register multiple nodes, please add them in topological order. We do not topologically sort these nodes as it is a time-consuming operation.
 
@@ -128,11 +128,11 @@ register_matcher(m, callback);
 ### Execute MatcherPass
 MatcherPass has multiple ways to be executed:
 * Run on a single node - it can be useful if you want to run MatcherPass inside another transformation.
-@snippet src/template_pattern_transformation.cpp matcher_pass:run_on_node
+@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:run_on_node
 * Run on `ngraph::Function` using GraphRewrite - this approach gives ability to run MatcherPass on whole `ngraph::Function`. Moreover, multiple MatcherPass transformation can be registered in a single GraphRewite to be executed in a single graph traversal.
-@snippet src/template_pattern_transformation.cpp matcher_pass:graph_rewrite
+@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:graph_rewrite
 * Run on `ngraph::Function` using `pass::Manager` - this approach helps you to register MatcherPass for execution on `ngraph::Function` as another transformation types.
-@snippet src/template_pattern_transformation.cpp matcher_pass:manager
+@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:manager
 
 
 ### ngraph::pass::GraphRewrite <a name="graph_rewrite_pass"></a>
@@ -140,7 +140,7 @@ MatcherPass has multiple ways to be executed:
 GraphRewrite pass serves for running multiple matcher passes on `ngraph::Function` in a single graph traversal.
 Example:
 
-@snippet src/template_pattern_transformation.cpp matcher_pass:graph_rewrite
+@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:graph_rewrite
 
 In addition, GraphRewrite handles nodes that were registered by MatcherPasses during their execution. This nodes will be added to the beginning of the sequence with nodes for pattern matching.
 
@@ -352,7 +352,7 @@ Manual constant folding is more preferable than `ngraph::pass::ConstantFolding()
 
 Below you can find an example of manual constant folding:
 
-@snippet src/template_pattern_transformation.cpp manual_constant_folding
+@snippet src/transformations/template_pattern_transformation.cpp manual_constant_folding
 
 ## Common mistakes in transformations <a name="common_mistakes"></a>
 
@@ -373,11 +373,11 @@ In addition, `ngraph::pass::Manager` has extended debug capabilities (find more
 
 The example below shows basic usage of `ngraph::pass::Manager`
 
-@snippet src/template_pattern_transformation.cpp matcher_pass:manager3
+@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:manager3
 
 Another example shows how multiple matcher passes can be united into single GraphRewrite.
 
-@snippet src/template_pattern_transformation.cpp matcher_pass:manager2
+@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:manager2
 
 > **Note:** nGraph used to have the `pass::PassConfig` class for transformation pipeline manipulation.
 This mechanism is now obsolete and the `pass::PassConfig` class will be removed in future release.
diff --git a/docs/template_plugin/src/CMakeLists.txt b/docs/template_plugin/src/CMakeLists.txt
index 68693126df7..62cfe6641a1 100644
--- a/docs/template_plugin/src/CMakeLists.txt
+++ b/docs/template_plugin/src/CMakeLists.txt
@@ -5,7 +5,7 @@
 # [cmake:plugin]
 set(TARGET_NAME "templatePlugin")
 
-file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
+file(GLOB_RECURSE SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
 file(GLOB_RECURSE HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
 
 # adds a shared library with plugin
diff --git a/docs/template_plugin/src/template_config.cpp b/docs/template_plugin/src/template_config.cpp
index ec75893462b..c29e17512c3 100644
--- a/docs/template_plugin/src/template_config.cpp
+++ b/docs/template_plugin/src/template_config.cpp
@@ -28,7 +28,7 @@ Configuration::Configuration(const ConfigMap& config, const Configuration & defa
         } else if (CONFIG_KEY(DEVICE_ID) == key) {
             deviceId = std::stoi(value);
             if (deviceId > 0) {
-                IE_THROW() << "Device ID " << deviceId << " is not supported";
+                IE_THROW(NotImplemented) << "Device ID " << deviceId << " is not supported";
             }
         } else if (CONFIG_KEY(PERF_COUNT) == key) {
             perfCount = (CONFIG_VALUE(YES) == value);
diff --git a/docs/template_plugin/src/template_executable_network.cpp b/docs/template_plugin/src/template_executable_network.cpp
index 91c1917e5ac..e599dceb434 100644
--- a/docs/template_plugin/src/template_executable_network.cpp
+++ b/docs/template_plugin/src/template_executable_network.cpp
@@ -16,6 +16,8 @@ using namespace TemplatePlugin;
 
 // ! [executable_network:ctor_cnnnetwork]
 TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function,
+                                                     const InferenceEngine::InputsDataMap&          inputInfoMap,
+                                                     const InferenceEngine::OutputsDataMap&         outputsInfoMap,
                                                      const Configuration&                           cfg,
                                                      const Plugin::Ptr&                             plugin) :
     InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation
@@ -25,14 +27,14 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<const
     // you should select proper device based on KEY_DEVICE_ID or automatic behavior
     // In this case, _waitExecutor should also be created per device.
     try {
-        CompileNetwork(function);
+        CompileNetwork(function, inputInfoMap, outputsInfoMap);
         InitExecutor(); // creates thread-based executor using for async requests
     } catch (const InferenceEngine::Exception&) {
         throw;
     } catch (const std::exception & e) {
-        IE_THROW() << "Standard exception from compilation library: " << e.what();
+        IE_THROW(Unexpected) << "Standard exception from compilation library: " << e.what();
     } catch (...) {
-        IE_THROW() << "Generic exception is thrown";
+        IE_THROW(Unexpected) << "Generic exception is thrown";
     }
 }
 // ! [executable_network:ctor_cnnnetwork]
@@ -64,6 +66,8 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream &       model,
 
     // TODO: implement Import / Export of configuration options and merge with `cfg`
     // TODO: implement Import / Export of network precisions, layouts, preprocessing info
+    InferenceEngine::InputsDataMap inputInfoMap;
+    InferenceEngine::OutputsDataMap outputInfoMap;
 
     auto cnnnetwork = _plugin->GetCore()->ReadNetwork(xmlString, std::move(dataBlob));
 
@@ -72,27 +76,31 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream &       model,
     SetPointerToPlugin(_plugin->shared_from_this());
 
     try {
-        CompileNetwork(cnnnetwork.getFunction());
+        CompileNetwork(cnnnetwork.getFunction(), inputInfoMap, outputInfoMap);
         InitExecutor(); // creates thread-based executor using for async requests
     } catch (const InferenceEngine::Exception&) {
         throw;
     } catch (const std::exception & e) {
-        IE_THROW() << "Standard exception from compilation library: " << e.what();
+        IE_THROW(Unexpected) << "Standard exception from compilation library: " << e.what();
     } catch (...) {
-        IE_THROW() << "Generic exception is thrown";
+        IE_THROW(Unexpected) << "Generic exception is thrown";
     }
 }
 // ! [executable_network:ctor_import_stream]
 
 // ! [executable_network:map_graph]
 // forward declaration
-std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function);
+std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function,
+                                                   const InferenceEngine::InputsDataMap & inputInfoMap,
+                                                   const InferenceEngine::OutputsDataMap& outputsInfoMap);
 
-void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<const ngraph::Function>& function) {
+void TemplatePlugin::ExecutableNetwork::CompileNetwork(const std::shared_ptr<const ngraph::Function>& function,
+                                                       const InferenceEngine::InputsDataMap & inputInfoMap,
+                                                       const InferenceEngine::OutputsDataMap& outputsInfoMap) {
     // TODO: perform actual graph compilation / mapping to backend graph representation / kernels
 
     // apply plugins transformations
-    _function = TransformNetwork(function);
+    _function = TransformNetwork(function, inputInfoMap, outputsInfoMap);
 
     // Generate backend specific blob mappings. For example Inference Engine uses not ngraph::Result nodes friendly name
     // as inference request output names but the name of the layer before.
diff --git a/docs/template_plugin/src/template_executable_network.hpp b/docs/template_plugin/src/template_executable_network.hpp
index a7332e9bab1..23f781a2efd 100644
--- a/docs/template_plugin/src/template_executable_network.hpp
+++ b/docs/template_plugin/src/template_executable_network.hpp
@@ -25,6 +25,8 @@ class Plugin;
 class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
 public:
     ExecutableNetwork(const std::shared_ptr<const ngraph::Function>& function,
+                      const InferenceEngine::InputsDataMap&          inputInfoMap,
+                      const InferenceEngine::OutputsDataMap&         outputsInfoMap,
                       const Configuration&                           cfg,
                       const std::shared_ptr<Plugin>&                 plugin);
 
@@ -38,7 +40,7 @@ public:
 
     void ExportImpl(std::ostream& model) override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
-                                                                      InferenceEngine::OutputsDataMap networkOutputs) override;
+                                                                       InferenceEngine::OutputsDataMap networkOutputs) override;
     InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
     InferenceEngine::Parameter GetMetric(const std::string &name) const override;
     InferenceEngine::Parameter GetConfig(const std::string &name) const override;
@@ -46,7 +48,9 @@ public:
 private:
     friend class TemplateInferRequest;
 
-    void CompileNetwork(const std::shared_ptr<const ngraph::Function>& function);
+    void CompileNetwork(const std::shared_ptr<const ngraph::Function>& function,
+                        const InferenceEngine::InputsDataMap&          inputInfoMap,
+                        const InferenceEngine::OutputsDataMap&         outputsInfoMap);
     void InitExecutor();
 
     std::atomic<std::size_t>                    _requestId = {0};
diff --git a/docs/template_plugin/src/template_infer_request.cpp b/docs/template_plugin/src/template_infer_request.cpp
index 9218c795520..49678c7960a 100644
--- a/docs/template_plugin/src/template_infer_request.cpp
+++ b/docs/template_plugin/src/template_infer_request.cpp
@@ -61,7 +61,8 @@ template<typename BlobDataMap, typename GetNetworkPrecisionF>
 static void AllocateImpl(const BlobDataMap& userDataMap,
                          BlobMap& userBlobMap,
                          BlobMap& deviceBlobMap,
-                         GetNetworkPrecisionF&& GetNetworkPrecision) {
+                         GetNetworkPrecisionF&& GetNetworkPrecision,
+                         bool isInputBlob = true) {
     for (auto&& userData : userDataMap) {
         auto& dims = userData.second->getTensorDesc().getDims();
         const auto devicePrecision = Precision::FP32;
@@ -77,7 +78,7 @@ static void AllocateImpl(const BlobDataMap& userDataMap,
             case Precision::FP32 : {
                 userBlob = InferenceEngine::make_shared_blob<float>({userPrecision, dims, userLayout});
             } break;
-            default: IE_THROW() << "Template Plugin: Unsupported Input/Output Precision";
+            default: IE_THROW(NotImplemented) << "Template Plugin: Unsupported Input/Output Precision";
         }
         userBlob->allocate();
         userBlobMap[userData.first] = userBlob;
@@ -92,12 +93,16 @@ static void AllocateImpl(const BlobDataMap& userDataMap,
                     deviceBlob = InferenceEngine::make_shared_blob<float>({devicePrecision, dims, deviceLayout});
                 }
             } break;
-            default: IE_THROW() << "Template Plugin: Unsupported network Input/Output Presision";
+            default: IE_THROW(NotImplemented) << "Template Plugin: Unsupported network Input/Output Presision";
         }
-        // preprocessing converts user input blob to desired device input blob automatically
-        // NOTE: this is not supported for output user blobs yet
         if (userBlob != deviceBlob) {
-            deviceBlob->allocate();
+            if (isInputBlob) {
+                // preprocessing converts user input blob to desired device input blob automatically
+                deviceBlob->allocate();
+            } else {
+                // NOTE: this is not supported for output user blobs yet
+                IE_THROW(NotImplemented) << "Template Plugin: does not support setPrecision, setLayout for outputs";
+            }
         }
         deviceBlobMap[userData.first] = deviceBlob;
     }
@@ -111,7 +116,7 @@ void TemplateInferRequest::allocateBlobs() {
     auto&& results = _executableNetwork->_function->get_results();
     AllocateImpl(_networkOutputs, _outputs, _networkOutputBlobs, [&] (const std::string& blobName) {
         return results.at(_executableNetwork->_outputIndex.at(blobName))->get_element_type();
-    });
+    }, false);
 }
 
 // ! [infer_request:infer_impl]
@@ -140,7 +145,7 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
                     blobCopy<std::uint8_t, float>(src, dst);
                 } break;
                 default : {
-                    IE_THROW() << "Unsupported precision conversion from "
+                    IE_THROW(NotImplemented) << "Unsupported precision conversion from "
                         << src->getTensorDesc().getPrecision() <<" to " << dst->getTensorDesc().getPrecision();
                 }
             }
@@ -152,13 +157,13 @@ static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
                     blobCopy<float, std::uint8_t>(src, dst);
                 } break;
                 default : {
-                    IE_THROW() << "Unsupported precision conversion from "
+                    IE_THROW(NotImplemented) << "Unsupported precision conversion from "
                         << src->getTensorDesc().getPrecision() <<" to " << dst->getTensorDesc().getPrecision();
                 }
             }
         } break;
         default : {
-            IE_THROW() << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision();
+            IE_THROW(NotImplemented) << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision();
         }
     }
 }
diff --git a/docs/template_plugin/src/template_plugin.cpp b/docs/template_plugin/src/template_plugin.cpp
index 93623b78068..061d2c40d4d 100644
--- a/docs/template_plugin/src/template_plugin.cpp
+++ b/docs/template_plugin/src/template_plugin.cpp
@@ -22,7 +22,8 @@
 #include "template_plugin.hpp"
 #include "template_executable_network.hpp"
 #include "template_infer_request.hpp"
-#include "template_pattern_transformation.hpp"
+#include "transformations/template_pattern_transformation.hpp"
+#include "transformations/preprocessing/preprocessing.hpp"
 
 using namespace TemplatePlugin;
 
@@ -52,12 +53,17 @@ Plugin::~Plugin() {
 
 // ! [plugin:transform_network]
 
-std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function) {
+std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const ngraph::Function>& function,
+                                                   const InferenceEngine::InputsDataMap & inputInfoMap,
+                                                   const InferenceEngine::OutputsDataMap& outputsInfoMap) {
     // 1. Copy ngraph::Function first to apply some transformations which modify original ngraph::Function
     auto transformedNetwork = ngraph::clone_function(*function);
 
     // 2. Perform common optimizations and device-specific transformations
     ngraph::pass::Manager passManager;
+    // Example: register transformation to convert preprocessing information to graph nodes
+    passManager.register_pass<ngraph::pass::AddPreprocessing>(inputInfoMap);
+    // TODO: add post-processing based on outputsInfoMap
     // Example: register CommonOptimizations transformation from transformations library
     passManager.register_pass<ngraph::pass::CommonOptimizations>();
     // Template plugin handles only FP32 networks
@@ -81,8 +87,12 @@ InferenceEngine::ExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const
                                                                            const ConfigMap &config) {
     OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, "Plugin::LoadExeNetworkImpl");
 
+    InferenceEngine::InputsDataMap networkInputs = network.getInputsInfo();
+    InferenceEngine::OutputsDataMap networkOutputs = network.getOutputsInfo();
+
     auto fullConfig = Configuration{ config, _cfg };
-    return std::make_shared<ExecutableNetwork>(network.getFunction(), fullConfig,
+    return std::make_shared<ExecutableNetwork>(network.getFunction(),
+        networkInputs, networkOutputs, fullConfig,
         std::static_pointer_cast<Plugin>(shared_from_this()));
 }
 // ! [plugin:load_exe_network_impl]
@@ -114,7 +124,7 @@ InferenceEngine::QueryNetworkResult Plugin::QueryNetwork(const InferenceEngine::
     }
 
     // 2. It is needed to apply all transformations as it is done in LoadExeNetworkImpl
-    auto transformedFunction = TransformNetwork(function);
+    auto transformedFunction = TransformNetwork(function, network.getInputsInfo(), network.getOutputsInfo());
 
     // 3. The same input node can be transformed into supported and unsupported backend node
     // So we need store as supported either unsupported node sets
@@ -246,7 +256,7 @@ InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std:
         using uint = unsigned int;
         IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, std::make_tuple(uint{1}, uint{1}, uint{1}));
     } else  {
-        IE_THROW() << "Unsupported device metric: " << name;
+        IE_THROW(NotFound) << "Unsupported device metric: " << name;
     }
 }
 // ! [plugin:get_metric]
diff --git a/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.cpp b/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.cpp
new file mode 100644
index 00000000000..7f20813e1ba
--- /dev/null
+++ b/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include "transformations/preprocessing/mean_image_or_value.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::AddMeanSubtract, "AddMeanSubtract", 0);
+
+ngraph::pass::AddMeanSubtract::AddMeanSubtract(const MeanMap & inputInfoMap) {
+    // RUN_ON_FUNCTION_SCOPE(AddMeanSubtract);
+    auto param = ngraph::pattern::wrap_type<ngraph::opset3::Parameter>();
+
+    ngraph::matcher_pass_callback callback = [=] (pattern::Matcher& m) {
+        auto param = std::dynamic_pointer_cast<ngraph::opset3::Parameter>(m.get_match_root());
+        if (!param) {
+            return false;
+        }
+
+        auto it = inputInfoMap.find(param->get_friendly_name());
+        if (it == inputInfoMap.end()) {
+            return false;
+        }
+
+        auto mean_const = it->second;
+        NGRAPH_CHECK(mean_const->get_element_type() == ngraph::element::f32,
+            "Mean for ", param->get_friendly_name(), " must have f32 type");
+
+        auto copy_param = param->clone_with_new_inputs({});
+        auto sub = std::make_shared<ngraph::opset3::Subtract>(copy_param, mean_const);
+
+        ngraph::replace_node(param, sub);
+        sub->set_argument(0, param);
+
+        // Return true as the root node was changed
+        return true;
+    };
+
+    // Register pattern with Parameter operation as a pattern root node
+    auto m = std::make_shared<ngraph::pattern::Matcher>(param, "AddMeanSubtract");
+    // Register Matcher
+    register_matcher(m, callback);
+}
diff --git a/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.hpp b/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.hpp
new file mode 100644
index 00000000000..906bfdc0aa4
--- /dev/null
+++ b/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+
+#include <ngraph/op/constant.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+#include "transformations_visibility.hpp"
+
+namespace ngraph {
+namespace pass {
+
+class AddMeanSubtract;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Add `meanValue` or `meanImage` preprocessing to input nodes
+ */
+class ngraph::pass::AddMeanSubtract : public ngraph::pass::MatcherPass {
+public:
+    using MeanMap = std::map<std::string, std::shared_ptr<ngraph::op::v0::Constant>>;
+
+    NGRAPH_RTTI_DECLARATION;
+    explicit AddMeanSubtract(const MeanMap & inputInfoMap);
+};
diff --git a/docs/template_plugin/src/transformations/preprocessing/preprocessing.cpp b/docs/template_plugin/src/transformations/preprocessing/preprocessing.cpp
new file mode 100644
index 00000000000..a7e6d8bc718
--- /dev/null
+++ b/docs/template_plugin/src/transformations/preprocessing/preprocessing.cpp
@@ -0,0 +1,101 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/pass/manager.hpp>
+#include <ngraph/opsets/opset3.hpp>
+
+#include "transformations/preprocessing/mean_image_or_value.hpp"
+#include "transformations/preprocessing/std_scale.hpp"
+#include "transformations/preprocessing/preprocessing.hpp"
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::AddPreprocessing, "AddPreprocessing", 0);
+
+ngraph::pass::AddPreprocessing::AddPreprocessing(const InferenceEngine::InputsDataMap & inputInfoMap)
+    : m_inputInfoMap(inputInfoMap) { }
+
+bool ngraph::pass::AddPreprocessing::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    ngraph::pass::AddMeanSubtract::MeanMap meanMap;
+    ngraph::pass::AddStdScale::ScaleMap scaleMap;
+
+    for (const auto & it : m_inputInfoMap) {
+        bool has_scales = false, has_mean_values = false, has_mean_image = false;
+        const InferenceEngine::PreProcessInfo & pInfo = it.second->getPreProcess();
+        const auto & inputDims = it.second->getTensorDesc().getDims();
+        const size_t cn = pInfo.getNumberOfChannels();
+        std::vector<float> meanValues(cn), stdScales(cn);
+        InferenceEngine::Blob::Ptr meanImage = nullptr;
+
+        for (size_t c = 0; c < cn; ++c) {
+            if ((stdScales[c] = pInfo[c]->stdScale) != 1.0f) {
+                has_scales = true;
+            }
+
+            if ((meanValues[c] = pInfo[c]->meanValue) != 0.0f) {
+                has_mean_values = true;
+            }
+
+            if (pInfo[c]->meanData != nullptr) {
+                has_mean_image = true;
+                if (c == 0) {
+                    meanImage = pInfo[c]->meanData;
+                    NGRAPH_CHECK(meanImage->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32,
+                        "Only InferenceEngine::Precision::FP32 precision is supported for PreProcessChannel::meanData");
+                } else {
+                    NGRAPH_CHECK(meanImage->getTensorDesc() == pInfo[c]->meanData->getTensorDesc(),
+                        "TensorDesc for PreProcessChannel::meanData must be equal");
+                }
+            }
+        }
+
+        // no preprocessing for current input
+        if (!has_mean_values && !has_scales && !has_mean_image) {
+            continue;
+        }
+
+        NGRAPH_CHECK(!(has_mean_image && has_scales),
+            "Only PreProcessChannel::meanData or PreProcessChannel::meanValue can be set.");
+
+        if (has_scales) {
+            ngraph::Shape shape(inputDims.size(), 1);
+            shape[1] = stdScales.size(); // C
+            scaleMap[it.first] = ngraph::opset3::Constant::create(ngraph::element::f32, shape, stdScales);
+        }
+
+        if (has_mean_values) {
+            ngraph::Shape shape(inputDims.size(), 1);
+            shape[1] = meanValues.size(); // C
+            meanMap[it.first] = ngraph::opset3::Constant::create(ngraph::element::f32, shape, meanValues);
+        } else if (has_mean_image) {
+            ngraph::Shape shape = { cn };
+            auto dims = meanImage->getTensorDesc().getDims();
+            std::copy(dims.begin(), dims.end(), std::back_inserter(shape));
+
+            std::vector<float> meanImageData(ngraph::shape_size(shape));
+            for (size_t c = 0, i = 0; c < cn; ++c) {
+                auto lm = pInfo[c]->meanData->buffer();
+                const float *data = lm.as<const float *>();
+
+                std::memcpy(&meanImageData[i], data, meanImage->byteSize());
+                i += meanImage->size();
+            }
+
+            meanMap[it.first] = ngraph::opset3::Constant::create(ngraph::element::f32,
+                shape, meanImageData);
+        }
+    }
+
+    ngraph::pass::Manager manager(get_pass_config());
+    auto preproc = manager.register_pass<ngraph::pass::GraphRewrite>();
+
+    if (!scaleMap.empty()) {
+        preproc->add_matcher<ngraph::pass::AddStdScale>(scaleMap);
+    }
+    if (!meanMap.empty()) {
+        preproc->add_matcher<ngraph::pass::AddMeanSubtract>(meanMap);
+    }
+
+    manager.run_passes(f);
+
+    return false;
+}
diff --git a/docs/template_plugin/src/transformations/preprocessing/preprocessing.hpp b/docs/template_plugin/src/transformations/preprocessing/preprocessing.hpp
new file mode 100644
index 00000000000..3ff95fc95ea
--- /dev/null
+++ b/docs/template_plugin/src/transformations/preprocessing/preprocessing.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/pass.hpp>
+
+#include "ie_input_info.hpp"
+
+namespace ngraph {
+namespace pass {
+
+class AddPreprocessing;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @brief Converts the following preprocessing information to ngraph operations:
+ *  - InferenceEngine::PreProcessInfo->PreProcessChannel::meanData -> Subtract
+ *  - InferenceEngine::PreProcessInfo->PreProcessChannel::meanValue -> Subtract
+ *  - InferenceEngine::PreProcessInfo->PreProcessChannel::stdScale -> Multiply
+ *
+ * The order of operations is the following:
+ *      (x - mean) * stdScale
+ */
+class ngraph::pass::AddPreprocessing : public ngraph::pass::FunctionPass {
+    const InferenceEngine::InputsDataMap & m_inputInfoMap;
+public:
+    NGRAPH_RTTI_DECLARATION;
+    explicit AddPreprocessing(const InferenceEngine::InputsDataMap & inputInfoMap);
+
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
diff --git a/docs/template_plugin/src/transformations/preprocessing/std_scale.cpp b/docs/template_plugin/src/transformations/preprocessing/std_scale.cpp
new file mode 100644
index 00000000000..a27e017451b
--- /dev/null
+++ b/docs/template_plugin/src/transformations/preprocessing/std_scale.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include "transformations/preprocessing/std_scale.hpp"
+
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::AddStdScale, "AddStdScale", 0);
+
+ngraph::pass::AddStdScale::AddStdScale(const ScaleMap& inputInfoMap) {
+    // RUN_ON_FUNCTION_SCOPE(AddStdScale);
+    auto param = ngraph::pattern::wrap_type<ngraph::opset3::Parameter>();
+
+    ngraph::matcher_pass_callback callback = [=] (pattern::Matcher& m) {
+        auto param = std::dynamic_pointer_cast<ngraph::opset3::Parameter>(m.get_match_root());
+        if (!param) {
+            return false;
+        }
+
+        auto it = inputInfoMap.find(param->get_friendly_name());
+        if (it == inputInfoMap.end()) {
+            return false;
+        }
+
+        auto scale_const = it->second;
+        NGRAPH_CHECK(scale_const->get_element_type() == ngraph::element::f32,
+            "Scale for ", param->get_friendly_name(), " must have f32 type");
+
+        auto copy_param = param->clone_with_new_inputs({});
+        auto mul = std::make_shared<ngraph::opset3::Multiply>(copy_param, it->second);
+
+        ngraph::replace_node(param, mul);
+        mul->set_argument(0, param);
+
+        // Return true as the root node was changed
+        return true;
+    };
+
+    // Register pattern with Parameter operation as a pattern root node
+    auto m = std::make_shared<ngraph::pattern::Matcher>(param, "AddStdScale");
+    // Register Matcher
+    register_matcher(m, callback);
+}
diff --git a/docs/template_plugin/src/transformations/preprocessing/std_scale.hpp b/docs/template_plugin/src/transformations/preprocessing/std_scale.hpp
new file mode 100644
index 00000000000..edc2838bd46
--- /dev/null
+++ b/docs/template_plugin/src/transformations/preprocessing/std_scale.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <string>
+
+#include <ngraph/op/constant.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+#include "transformations_visibility.hpp"
+
+namespace ngraph {
+namespace pass {
+
+class AddStdScale;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Add `stdScale` preprocessing to input nodes
+ */
+class ngraph::pass::AddStdScale : public ngraph::pass::MatcherPass {
+public:
+    using ScaleMap = std::map<std::string, std::shared_ptr<ngraph::op::v0::Constant>>;
+
+    NGRAPH_RTTI_DECLARATION;
+    explicit AddStdScale(const ScaleMap& inputInfoMap);
+};
diff --git a/docs/template_plugin/src/template_function_transformation.cpp b/docs/template_plugin/src/transformations/template_function_transformation.cpp
similarity index 100%
rename from docs/template_plugin/src/template_function_transformation.cpp
rename to docs/template_plugin/src/transformations/template_function_transformation.cpp
diff --git a/docs/template_plugin/src/template_function_transformation.hpp b/docs/template_plugin/src/transformations/template_function_transformation.hpp
similarity index 100%
rename from docs/template_plugin/src/template_function_transformation.hpp
rename to docs/template_plugin/src/transformations/template_function_transformation.hpp
diff --git a/docs/template_plugin/src/template_pattern_transformation.cpp b/docs/template_plugin/src/transformations/template_pattern_transformation.cpp
similarity index 97%
rename from docs/template_plugin/src/template_pattern_transformation.cpp
rename to docs/template_plugin/src/transformations/template_pattern_transformation.cpp
index 8a7c3bb8d3f..c1a3a92fa15 100644
--- a/docs/template_plugin/src/template_pattern_transformation.cpp
+++ b/docs/template_plugin/src/transformations/template_pattern_transformation.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "template_pattern_transformation.hpp"
-#include "template_function_transformation.hpp"
+#include "transformations/template_pattern_transformation.hpp"
+#include "transformations/template_function_transformation.hpp"
 
 #include <ngraph/opsets/opset3.hpp>
 #include <ngraph/pass/manager.hpp>
diff --git a/docs/template_plugin/src/template_pattern_transformation.hpp b/docs/template_plugin/src/transformations/template_pattern_transformation.hpp
similarity index 93%
rename from docs/template_plugin/src/template_pattern_transformation.hpp
rename to docs/template_plugin/src/transformations/template_pattern_transformation.hpp
index 4ea8fadf14d..f2b8d400988 100644
--- a/docs/template_plugin/src/template_pattern_transformation.hpp
+++ b/docs/template_plugin/src/transformations/template_pattern_transformation.hpp
@@ -16,7 +16,7 @@ class ReluReluFusionMatcher;
 }  // namespace ngraph
 
 // ! [graph_rewrite:template_transformation_hpp]
-// template_pattern_transformation.hpp
+// transformations/template_pattern_transformation.hpp
 /**
  * @ingroup ie_transformation_common_api
  * @brief Add transformation description.
diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing.cpp
index 7041ceb5080..84de2307c53 100644
--- a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing.cpp
+++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing.cpp
@@ -19,7 +19,7 @@ const std::vector<std::map<std::string, std::string>> configs = {
     {}
 };
 
-INSTANTIATE_TEST_CASE_P(PreprocessingPrecisionConvertTestsViaSetInput, PreprocessingPrecisionConvertTest,
+INSTANTIATE_TEST_CASE_P(smoke_PreprocessingPrecisionConvertTestsViaSetInput, PreprocessingPrecisionConvertTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputPrecisions),
                                 ::testing::Values(4),   // Number of input tensor channels
@@ -28,7 +28,7 @@ INSTANTIATE_TEST_CASE_P(PreprocessingPrecisionConvertTestsViaSetInput, Preproces
                                 ::testing::ValuesIn(configs)),
                         PreprocessingPrecisionConvertTest::getTestCaseName);
 
-INSTANTIATE_TEST_CASE_P(PreprocessingPrecisionConvertTestsViaGetBlob, PreprocessingPrecisionConvertTest,
+INSTANTIATE_TEST_CASE_P(smoke_PreprocessingPrecisionConvertTestsViaGetBlob, PreprocessingPrecisionConvertTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(inputPrecisions),
                                 ::testing::Values(4),       // Number of input tensor channels (blob_copy only supports 4d and 5d tensors)
diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/set_preprocess.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/set_preprocess.cpp
index 1dd8b83f2ec..90d6bdcf881 100644
--- a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/set_preprocess.cpp
+++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/set_preprocess.cpp
@@ -19,6 +19,15 @@ const std::vector<std::map<std::string, std::string>> configs = {
     {}
 };
 
+const std::vector<std::map<std::string, std::string>> multiConfigs = {
+        {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES,
+           CommonTestUtils::DEVICE_TEMPLATE }}
+};
+
+const std::vector<std::map<std::string, std::string>> heteroConfigs = {
+        {{ "TARGET_FALLBACK", CommonTestUtils::DEVICE_TEMPLATE }}
+};
+
 INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreprocessTest,
                         ::testing::Combine(
                                 ::testing::ValuesIn(netPrecisions),
@@ -26,6 +35,20 @@ INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreprocessTest,
                                 ::testing::ValuesIn(configs)),
                         PreprocessTest::getTestCaseName);
 
+INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreprocessTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_MULTI),
+                                ::testing::ValuesIn(multiConfigs)),
+                        PreprocessTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_Hetero_BehaviorTests, PreprocessTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_HETERO),
+                                ::testing::ValuesIn(heteroConfigs)),
+                        PreprocessTest::getTestCaseName);
+
 const std::vector<InferenceEngine::Precision> ioPrecisions = {
     InferenceEngine::Precision::FP32,
     InferenceEngine::Precision::U8
diff --git a/docs/template_plugin/tests/functional/skip_tests_config.cpp b/docs/template_plugin/tests/functional/skip_tests_config.cpp
index 7b3d7e75b90..a153c0940a0 100644
--- a/docs/template_plugin/tests/functional/skip_tests_config.cpp
+++ b/docs/template_plugin/tests/functional/skip_tests_config.cpp
@@ -12,10 +12,10 @@ std::vector<std::string> disabledTestPatterns() {
         ".*ExclusiveAsyncRequests.*",
         ".*reusableCPUStreamsExecutor.*",
         R"(.*SplitLayerTest.*numSplits\=30.*)",
-        // CVS-44774
-        ".*PreprocessTest.*",
         // CVS-51758
         ".*PreprocessConversionTest.*oPRC=U8.*",
-        ".*PreprocessConversionTest.*oLT=NHWC.*"
+        ".*PreprocessConversionTest.*oLT=NHWC.*",
+        ".*PreprocessingPrecisionConvertTestsViaSetInput.*SetInput.*",
+        ".*PreprocessingPrecisionConvertTestsViaGetBlob.*GetBlob.*",
     };
 }
\ No newline at end of file
diff --git a/docs/template_plugin/tests/functional/transformations/preprocessing.cpp b/docs/template_plugin/tests/functional/transformations/preprocessing.cpp
new file mode 100644
index 00000000000..b7721e68d48
--- /dev/null
+++ b/docs/template_plugin/tests/functional/transformations/preprocessing.cpp
@@ -0,0 +1,183 @@
+// // Copyright (C) 2021 Intel Corporation
+// // SPDX-License-Identifier: Apache-2.0
+// //
+
+// #include <gtest/gtest.h>
+
+// #include <string>
+// #include <memory>
+// #include <map>
+
+// #include <ngraph/function.hpp>
+// #include <ngraph/opsets/opset5.hpp>
+// #include <ngraph/pass/manager.hpp>
+
+// #include <transformations/init_node_info.hpp>
+// #include <transformations/preprocessing/std_scale.hpp>
+// #include <transformations/preprocessing/mean_image_or_value.hpp>
+
+// #include "common_test_utils/ngraph_test_utils.hpp"
+
+
+// using namespace testing;
+// using namespace ngraph;
+
+
+// TEST(TransformationTests, Preprocessing_AddStdScale) {
+//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+//     const Shape data_shape{1, 3, 14, 14};
+//     const Shape scale_shape{3, 1, 1};
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto relu = std::make_shared<opset5::Relu>(data);
+//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//         auto scales = opset5::Constant::create(element::f32, scale_shape,
+//             std::vector<float>(shape_size(scale_shape), 2.0f));
+//         pass::Manager m;
+//         m.register_pass<pass::InitNodeInfo>();
+//         m.register_pass<pass::AddStdScale>(pass::AddStdScale::ScaleMap{ { data->get_friendly_name(), scales } });
+//         m.run_passes(f);
+//     }
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto scales = opset5::Constant::create(element::f32, scale_shape,
+//             std::vector<float>(shape_size(scale_shape), 2.0f));
+//         auto mul = std::make_shared<opset5::Multiply>(data, scales);
+//         auto relu = std::make_shared<opset5::Relu>(mul);
+//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//     }
+
+//     auto res = compare_functions(f, f_ref);
+//     ASSERT_TRUE(res.first) << res.second;
+// }
+
+// TEST(TransformationTests, Preprocessing_AddMeanValue) {
+//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+//     const Shape data_shape{1, 3, 14, 14};
+//     const Shape mean_shape{3, 1, 1};
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto relu = std::make_shared<opset5::Relu>(data);
+//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
+//             std::vector<float>(shape_size(mean_shape), 2.0f));
+//         pass::Manager m;
+//         m.register_pass<pass::InitNodeInfo>();
+//         m.register_pass<pass::AddMeanSubtract>(pass::AddMeanSubtract::MeanMap{ { data->get_friendly_name(), meanValues } });
+//         m.run_passes(f);
+//     }
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
+//             std::vector<float>(shape_size(mean_shape), 2.0f));
+//         auto sub = std::make_shared<opset5::Subtract>(data, meanValues);
+//         auto relu = std::make_shared<opset5::Relu>(sub);
+//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//     }
+
+//     auto res = compare_functions(f, f_ref);
+//     ASSERT_TRUE(res.first) << res.second;
+// }
+
+// TEST(TransformationTests, Preprocessing_AddMeanImage) {
+//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+//     const Shape data_shape{1, 3, 14, 14};
+//     const Shape mean_shape{3, 14, 14};
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto relu = std::make_shared<opset5::Relu>(data);
+//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
+//             std::vector<float>(shape_size(mean_shape), 2.0f));
+//         pass::Manager m;
+//         m.register_pass<pass::InitNodeInfo>();
+//         m.register_pass<pass::AddMeanSubtract>(pass::AddMeanSubtract::MeanMap{ { data->get_friendly_name(), meanValues } });
+//         m.run_passes(f);
+//     }
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
+//             std::vector<float>(shape_size(mean_shape), 2.0f));
+//         auto sub = std::make_shared<opset5::Subtract>(data, meanValues);
+//         auto relu = std::make_shared<opset5::Relu>(sub);
+//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//     }
+
+//     auto res = compare_functions(f, f_ref);
+//     ASSERT_TRUE(res.first) << res.second;
+// }
+
+// TEST(TransformationTests, Preprocessing_AddMeanImageAndScale) {
+//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+//     const Shape data_shape{1, 3, 14, 14};
+//     const Shape mean_shape{3, 14, 14};
+//     const Shape scale_shape{3, 1, 1};
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto relu = std::make_shared<opset5::Relu>(data);
+//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
+//             std::vector<float>(shape_size(mean_shape), 2.0f));
+//         auto scaleValues = opset5::Constant::create(element::f32, scale_shape,
+//             std::vector<float>(shape_size(scale_shape), 2.0f));
+//         pass::Manager m;
+//         m.register_pass<pass::InitNodeInfo>();
+//         m.register_pass<pass::AddStdScale>(pass::AddStdScale::ScaleMap{ { data->get_friendly_name(), scaleValues } });
+//         m.register_pass<pass::AddMeanSubtract>(pass::AddMeanSubtract::MeanMap{ { data->get_friendly_name(), meanValues } });
+//         m.run_passes(f);
+//     }
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
+//             std::vector<float>(shape_size(mean_shape), 2.0f));
+//         auto scaleValues = opset5::Constant::create(element::f32, scale_shape,
+//             std::vector<float>(shape_size(scale_shape), 2.0f));
+//         auto sub = std::make_shared<opset5::Subtract>(data, meanValues);
+//         auto mul = std::make_shared<opset5::Multiply>(sub, scaleValues);
+//         auto relu = std::make_shared<opset5::Relu>(mul);
+//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//     }
+
+//     auto res = compare_functions(f, f_ref);
+//     ASSERT_TRUE(res.first) << res.second;
+// }
+
+// TEST(TransformationTests, Preprocessing_AddMeanValueAndScale) {
+//     std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+
+//     const Shape data_shape{1, 3, 14, 14};
+//     const Shape mean_shape{3, 1, 1};
+//     const Shape scale_shape{3, 1, 1};
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto relu = std::make_shared<opset5::Relu>(data);
+//         f = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
+//             std::vector<float>(shape_size(mean_shape), 2.0f));
+//         auto scaleValues = opset5::Constant::create(element::f32, scale_shape,
+//             std::vector<float>(shape_size(scale_shape), 2.0f));
+//         pass::Manager m;
+//         m.register_pass<pass::InitNodeInfo>();
+//         m.register_pass<pass::AddStdScale>(pass::AddStdScale::ScaleMap{ { data->get_friendly_name(), scaleValues } });
+//         m.register_pass<pass::AddMeanSubtract>(pass::AddMeanSubtract::MeanMap{ { data->get_friendly_name(), meanValues } });
+//         m.run_passes(f);
+//     }
+//     {
+//         auto data = std::make_shared<opset5::Parameter>(element::f32, data_shape);
+//         auto meanValues = opset5::Constant::create(element::f32, mean_shape,
+//             std::vector<float>(shape_size(mean_shape), 2.0f));
+//         auto scaleValues = opset5::Constant::create(element::f32, scale_shape,
+//             std::vector<float>(shape_size(scale_shape), 2.0f));
+//         auto sub = std::make_shared<opset5::Subtract>(data, meanValues);
+//         auto mul = std::make_shared<opset5::Multiply>(sub, meanValues);
+//         auto relu = std::make_shared<opset5::Relu>(mul);
+//         f_ref = std::make_shared<Function>(NodeVector{relu}, ParameterVector{data});
+//     }
+
+//     auto res = compare_functions(f, f_ref);
+//     ASSERT_TRUE(res.first) << res.second;
+// }
diff --git a/inference-engine/src/plugin_api/precision_utils.h b/inference-engine/src/plugin_api/precision_utils.h
index 96b7ca252dc..8ec4f546338 100644
--- a/inference-engine/src/plugin_api/precision_utils.h
+++ b/inference-engine/src/plugin_api/precision_utils.h
@@ -28,6 +28,9 @@ namespace InferenceEngine {
  * @{
  * @defgroup ie_dev_api_plugin_api Plugin base classes
  * @brief A set of base and helper classes to implement a plugin class
+ *
+ * @defgroup ie_dev_api_preproc_api Preprocessing API
+ * @brief A set transformations to convert InferenceEngine::PreProcessInfo to ngraph operations
  * 
  * @defgroup ie_dev_api_exec_network_api Executable Network base classes
  * @brief A set of base and helper classes to implement an executable network class
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
index 78a9e72665b..c498d7963e3 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp
@@ -124,8 +124,9 @@ TEST_P(PreprocessTest, SetMeanImagePreProcessGetBlob) {
         auto outMem = outBlob->cbuffer();
         const auto* outData = outMem.as<const float*>();
         ASSERT_EQ(inBlob->size(), outBlob->size());
-        for (size_t i = 0; i < inBlob->size(); i++)
+        for (size_t i = 0; i < inBlob->size(); i++) {
             ASSERT_EQ(inData[i] + inData[i], outData[i]);
+        }
     }
 }
 
@@ -255,8 +256,9 @@ TEST_P(PreprocessTest, SetMeanValuePreProcessGetBlob) {
         auto outMem = outBlob->cbuffer();
         const auto* outData = outMem.as<const float*>();
         ASSERT_EQ(inBlob->size(), outBlob->size());
-        for (size_t i = 0; i < inBlob->size(); i++)
-            ASSERT_EQ(inData[i]+5, outData[i]);
+        for (size_t i = 0; i < inBlob->size(); i++) {
+            ASSERT_EQ(inData[i] + 5, outData[i]);
+        }
     }
 }
 
@@ -511,8 +513,9 @@ TEST_P(PreprocessTest, SetScalePreProcessGetBlob) {
         auto outMem = outBlob->cbuffer();
         const auto* outData = outMem.as<const float*>();
         ASSERT_EQ(inBlob->size(), outBlob->size());
-        for (size_t i = 0; i < inBlob->size(); i++)
+        for (size_t i = 0; i < inBlob->size(); i++) {
             ASSERT_EQ(inData[i]*2, outData[i]);
+        }
     }
 }
 

From 03ca3d1ef712506e52c36218c51c65b61779c39c Mon Sep 17 00:00:00 2001
From: Alexandra Sidorova <alexandra.sidorova@intel.com>
Date: Fri, 30 Apr 2021 13:34:33 +0300
Subject: [PATCH 22/73] [CPU] Fixed SoftPlus for large positive values (#4932)

---
 docs/ops/activation/SoftPlus_4.md                | 16 +++++++++++++++-
 .../src/mkldnn_plugin/mkldnn_graph_optimizer.cpp | 10 +++++-----
 .../src/mkldnn_plugin/mkldnn_node.cpp            |  1 +
 .../src/mkldnn_plugin/nodes/list_tbl.hpp         |  1 -
 .../mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp |  2 +-
 .../mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp  |  5 +++--
 .../nodes/mkldnn_interpolate_node.cpp            |  2 +-
 .../cpu/single_layer_tests/convolution.cpp       |  1 +
 .../cpu/single_layer_tests/group_convolution.cpp |  1 +
 .../plugin/cpu/test_utils/fusing_test_utils.hpp  |  4 ++++
 .../shared_tests_instances/skip_tests_config.cpp |  2 ++
 .../shared_tests_instances/skip_tests_config.cpp |  2 ++
 .../src/single_layer/activation.cpp              |  6 ++++++
 inference-engine/thirdparty/mkl-dnn              |  2 +-
 .../ngraph/runtime/reference/softplus.hpp        |  5 ++++-
 ngraph/python/tests/test_onnx/test_ops_unary.py  |  2 +-
 ngraph/test/onnx/onnx_import.in.cpp              |  4 ++--
 17 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/docs/ops/activation/SoftPlus_4.md b/docs/ops/activation/SoftPlus_4.md
index 8afc94684ac..19714de749b 100644
--- a/docs/ops/activation/SoftPlus_4.md
+++ b/docs/ops/activation/SoftPlus_4.md
@@ -13,9 +13,23 @@
 *SoftPlus* performs element-wise activation function on a given input tensor, based on the following mathematical formula:
 
 \f[
-SoftPlus(x) = \ln(1+e^{x})
+SoftPlus(x) = \left\{\begin{array}{r}
+    x \qquad \mbox{if } x \geq threshold \\
+    log(e^{x} + 1.0) \qquad \mbox{if } x < threshold
+\end{array}\right.
 \f]
 
+**Note**: For numerical stability the operation reverts to the linear function when `x > threshold` where `threshold` depends on *T* and
+is chosen in such a way that the difference between the linear function and exact calculation is no more than `1e-6`.
+The `threshold` can be calculated with the following formula where `alpha` is the number of digits after the decimal point,
+`beta` is maximum value of *T* data type:
+
+\f[
+-log(e^{10^{-\alpha}} - 1.0) < threshold < log(\beta)
+\f]
+
+For example, if *T* is `fp32`, `threshold` should be `20` or if *T* is `fp16`, `threshold` should be `12`.
+
 **Attributes**: *SoftPlus* operation has no attributes.
 
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
index 10c808ba2b2..0976b5f1961 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -615,7 +615,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
             (eltwiseNode->getOpType() == Relu ||
             (conv->getCnnLayer()->precision == Precision::FP32 &&
             IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
-                                               Round})));
+                                               Round, SoftRelu})));
     };
 
     for (int i = 0; i < graphNodes.size(); i++) {
@@ -694,7 +694,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
                 IE_THROW() << "Cannot get Eltwise node " << childNode->getName();
 
             if (IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
-                                                   Hsigmoid, Round})) {
+                                                   Hsigmoid, Round, SoftRelu})) {
                 return true;
             } else if (IsOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu})) {
                 if (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() != 2)
@@ -1053,7 +1053,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
             return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) ||
                     (eltwiseNode->getOpType() == Prelu) ||
                     IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
-                                                       Hsigmoid, Round}));
+                                                       Hsigmoid, Round, SoftRelu}));
         }
 
         return false;
@@ -1269,7 +1269,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
             (eltwiseNode->getOpType() == Relu ||
             (conv->getCnnLayer()->precision == Precision::FP32 &&
              IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
-                                                Round})));
+                                                Round, SoftRelu})));
     };
 
     for (auto &graphNode : graphNodes) {
@@ -1568,7 +1568,7 @@ void MKLDNNGraphOptimizer::FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph) {
             if (eltwiseNode == nullptr)
                 IE_THROW() << "Cannot get Eltwise node " << node->getName();
             return IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Tanh, Swish,
-                                                      Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) ||
+                                                      Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt, SoftRelu}) ||
                     ((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) ||
                      (eltwiseNode->getOpType() == Prelu));
         }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index 114b6d18c05..d3af44347ad 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -80,6 +80,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
         { "Round", Eltwise },
         { "ScaleShift", Eltwise },
         { "PReLU", Eltwise },
+        { "SoftPlus", Eltwise },
         { "Norm", Lrn },
         { "LRN", Lrn },
         { "Pooling", Pooling },
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
index da6e70d7eeb..e66af69e08f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@@ -32,7 +32,6 @@ MKLDNN_EXTENSION_NODE(MathImpl, Selu);
 MKLDNN_EXTENSION_NODE(MathImpl, Sign);
 MKLDNN_EXTENSION_NODE(MathImpl, Sin);
 MKLDNN_EXTENSION_NODE(MathImpl, Sinh);
-MKLDNN_EXTENSION_NODE(MathImpl, SoftPlus);
 MKLDNN_EXTENSION_NODE(MathImpl, Softsign);
 MKLDNN_EXTENSION_NODE(MathImpl, Tan);
 MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
index 68c554ceef0..1738d1798a9 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
@@ -1114,7 +1114,7 @@ bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
         }
 
         return eltwiseNode->isSum() ||
-               isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp,
+               isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, SoftRelu,
                                                   Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt});
     }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
index 3f7b02b9a4c..fca94bf51d9 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
@@ -843,7 +843,7 @@ MKLDNNEltwiseNode::initializers = {
             opType = BoundedRelu;
             algorithm = mkldnn::algorithm::eltwise_bounded_relu;
         }},
-        {"soft_relu", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
+        {"softplus", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
             alpha = 0.0f;
             beta = 0.0f;
             opType = SoftRelu;
@@ -983,7 +983,8 @@ void MKLDNNEltwiseNode::init() {
                comparator(layerType, "hswish") ||
                comparator(layerType, "mish") ||
                comparator(layerType, "hsigmoid") ||
-               comparator(layerType, "round")) {
+               comparator(layerType, "round") ||
+               comparator(layerType, "softplus")) {
         initializers[layerType](getCnnLayer().get(), eltwiseOp, eltwiseAlgorithm, alpha, beta);
     } else if (comparator(layerType, "erf")) {
         eltwiseOp = Erf;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
index 8c496991393..b87c33b7320 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
@@ -3197,7 +3197,7 @@ bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const {
         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
         if (eltwiseNode == nullptr)
             IE_THROW() << "Cannot get eltwise node " << node->getName();
-        return isOneOf(eltwiseNode->getOpType(), {Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp,
+        return isOneOf(eltwiseNode->getOpType(), {Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, SoftRelu,
                                                   Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) ||
                 (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2);
     }
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp
index 7b596f40a9a..597b6d053b7 100755
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp
@@ -112,6 +112,7 @@ const std::vector<fusingSpecificParams> fusingParamsSet{
         fusingSwish,
         fusingHSwish,
         fusingMish,
+        fusingSoftPlus,
         // other patterns
         fusingReluScaleShift,
         fusingFakeQuantizePerTensorRelu,
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp
index 068f074c7ff..b3267e7e199 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp
@@ -123,6 +123,7 @@ std::vector<fusingSpecificParams> fusingParamsSet {
         fusingSwish,
         fusingHSwish,
         fusingMish,
+        fusingSoftPlus,
         // other patterns
         fusingReluScaleShift,
         fusingFakeQuantizePerTensorRelu,
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
index 9d2de2b2715..b084dacbd16 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
@@ -112,6 +112,10 @@ const auto fusingMish = fusingSpecificParams{std::make_shared<postNodesMgr>(std:
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Mish, {}, {});
             }, "Mish"}}), {"Mish"}};
+const auto fusingSoftPlus = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::SoftPlus, {}, {});
+            }, "SoftPlus"}}), {"SoftPlus"}};
 const auto fusingTanh = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Tanh, {}, {});
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
index 01ffef7fe45..631c6a88c4d 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -59,5 +59,7 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*ConstantResultSubgraphTest.*inPrc=I16.*)",
             // TODO: Issue: 54436
             R"(.*LSTMSequence.*CompareWithRefs.*mode=PURE_SEQ_RAND_SEQ_LEN_PARAM.*direction=bidirectional_clip=0.7_netPRC=FP32.*)",
+            // TODO: Issue: 54194
+            R"(.*ActivationLayerTest.*SoftPlus.*)",
     };
 }
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp
index 8bf55a9e35d..5252cddbd95 100644
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp
@@ -37,5 +37,7 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*CTCGreedyDecoderSeqLen.*?\(1.1.1\).*)",
         // TODO: Issue 51804
         ".*PreprocessConversionTest.*oPRC=U8.*",
+        // TODO: Issue 54163
+        R"(.*ActivationLayerTest.*SoftPlus.*)",
     };
 }
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
index 83edd50358b..58a671eae73 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/activation.cpp
@@ -101,6 +101,12 @@ InferenceEngine::Blob::Ptr ActivationLayerTest::GenerateInput(const InferenceEng
             resolution = 32768;
             break;
         }
+        case ngraph::helpers::ActivationTypes::SoftPlus: {
+            data_start_from = -100;
+            data_range = 200;
+            resolution = 32768;
+            break;
+        }
         default: {
             data_start_from = -10;
             data_range = 20;
diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn
index 0292c2a2a25..2dd78726213 160000
--- a/inference-engine/thirdparty/mkl-dnn
+++ b/inference-engine/thirdparty/mkl-dnn
@@ -1 +1 @@
-Subproject commit 0292c2a2a2525ff86590de3b499ceb61a5e2355f
+Subproject commit 2dd787262134c20f91f222bfa776225d2dddbc9a
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/softplus.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/softplus.hpp
index d68ecb31c0d..a5c95e4c6f9 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/softplus.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/softplus.hpp
@@ -16,9 +16,12 @@ namespace ngraph
             template <typename T>
             void softplus(const T* arg, T* out, size_t count)
             {
+                const T threshold = static_cast<T>(-std::log(std::exp(std::pow(10, -6)) - 1));
+
                 for (size_t i = 0; i < count; i++)
                 {
-                    out[i] = std::log(std::exp(arg[i]) + 1.0);
+                    out[i] = (arg[i] < threshold) ? static_cast<T>(std::log(std::exp(arg[i]) + 1))
+                                                  : arg[i];
                 }
             }
         } // namespace reference
diff --git a/ngraph/python/tests/test_onnx/test_ops_unary.py b/ngraph/python/tests/test_onnx/test_ops_unary.py
index ca300167b8c..582749264a6 100644
--- a/ngraph/python/tests/test_onnx/test_ops_unary.py
+++ b/ngraph/python/tests/test_onnx/test_ops_unary.py
@@ -266,7 +266,7 @@ def test_logsoftmax():
 
 def test_softplus():
     def softplus(x):
-        return np.log(np.exp(x) + 1)
+        return np.where(x < 20, np.log(np.exp(x) + 1), x)
 
     np.random.seed(133391)
     data = np.random.randn(3, 4, 5).astype(np.float32)
diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp
index a8b0d7c90b4..a5de02359ee 100644
--- a/ngraph/test/onnx/onnx_import.in.cpp
+++ b/ngraph/test/onnx/onnx_import.in.cpp
@@ -2402,9 +2402,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softplus)
                               0.6931471824645996094,
                               1.313261628150939941,
                               10.0000457763671875,
-                              inf,
+                              100.0,
                               0.0,
-                              inf,
+                              1000.0,
                               0.0,
                               0.6931471824645996094,
                               0.6931471824645996094,

From bcb67bfb6a6f439bc9ac41fa41a400c93e47c690 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Karzy=C5=84ski?=
 <4430709+postrational@users.noreply.github.com>
Date: Fri, 30 Apr 2021 13:37:34 +0200
Subject: [PATCH 23/73] Add support for ONNX BatchNorm-7 and -9 (#5465)

---
 .../onnx_import/src/op/batch_norm.cpp         | 33 ++++++++++++++++---
 .../onnx_import/src/op/batch_norm.hpp         |  6 ++++
 .../frontend/onnx_import/src/ops_bridge.cpp   |  1 +
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/ngraph/frontend/onnx_import/src/op/batch_norm.cpp b/ngraph/frontend/onnx_import/src/op/batch_norm.cpp
index c338e527020..81cfb910c74 100644
--- a/ngraph/frontend/onnx_import/src/op/batch_norm.cpp
+++ b/ngraph/frontend/onnx_import/src/op/batch_norm.cpp
@@ -18,6 +18,7 @@ namespace ngraph
         {
             namespace set_1
             {
+                // This version supports ONNX BatchNormalization-1 and BatchNormalization-6
                 OutputVector batch_norm(const Node& node)
                 {
                     OutputVector inputs{node.get_ng_inputs()};
@@ -27,11 +28,10 @@ namespace ngraph
                     Output<ngraph::Node> mean;
                     Output<ngraph::Node> var;
 
-                    std::int64_t is_test{node.get_attribute_value<std::int64_t>("is_test", 1)};
                     double epsilon{node.get_attribute_value<double>("epsilon", 1e-5)};
 
-                    // TODO: Implement learning mode support
-                    // float momentum{node.get_attribute_value<float>("momentum", 0.9f)};
+                    // Currently only BatchNormalization inference mode is supported by OpenVINO
+                    std::int64_t is_test{node.get_attribute_value<std::int64_t>("is_test", 1)};
                     CHECK_VALID_NODE(node, is_test, "only 'is_test' mode is supported.");
 
                     // optional outputs
@@ -55,9 +55,34 @@ namespace ngraph
                     throw ngraph_error(
                         "Cannot create nGraph batch norm with unsupported number of inputs");
                 }
-
             } // namespace set_1
 
+            namespace set_7
+            {
+                // This version supports ONNX BatchNormalization-7 and BatchNormalization-9
+                OutputVector batch_norm(const Node& node)
+                {
+                    OutputVector inputs{node.get_ng_inputs()};
+                    auto x = inputs.at(0);
+                    auto scale = inputs.at(1);
+                    auto bias = inputs.at(2);
+                    auto mean = inputs.at(3);
+                    auto var = inputs.at(4);
+
+                    double epsilon{node.get_attribute_value<double>("epsilon", 1e-5)};
+                    // Attribute "spatial" is ignored, as we only support inference mode of
+                    // BatchNormalization
+
+                    CHECK_VALID_NODE(node,
+                                     node.get_outputs_size() == 1,
+                                     "Training mode of BatchNormalization is not supported.");
+
+                    return {std::make_shared<default_opset::BatchNormInference>(
+                        x, scale, bias, mean, var, epsilon)};
+                }
+
+            } // namespace set_7
+
         } // namespace op
 
     } // namespace onnx_import
diff --git a/ngraph/frontend/onnx_import/src/op/batch_norm.hpp b/ngraph/frontend/onnx_import/src/op/batch_norm.hpp
index 9fa2623d02e..494c76a3c75 100644
--- a/ngraph/frontend/onnx_import/src/op/batch_norm.hpp
+++ b/ngraph/frontend/onnx_import/src/op/batch_norm.hpp
@@ -19,6 +19,12 @@ namespace ngraph
 
             } // namespace set_1
 
+            namespace set_7
+            {
+                OutputVector batch_norm(const Node& node);
+
+            } // namespace set_7
+
         } // namespace op
 
     } // namespace onnx_import
diff --git a/ngraph/frontend/onnx_import/src/ops_bridge.cpp b/ngraph/frontend/onnx_import/src/ops_bridge.cpp
index 85d27a05aff..0d74e8d79ae 100644
--- a/ngraph/frontend/onnx_import/src/ops_bridge.cpp
+++ b/ngraph/frontend/onnx_import/src/ops_bridge.cpp
@@ -316,6 +316,7 @@ namespace ngraph
             REGISTER_OPERATOR("Atanh", 1, atanh);
             REGISTER_OPERATOR("AveragePool", 1, average_pool);
             REGISTER_OPERATOR("BatchNormalization", 1, batch_norm);
+            REGISTER_OPERATOR("BatchNormalization", 7, batch_norm);
             REGISTER_OPERATOR("BitShift", 1, bitshift);
             REGISTER_OPERATOR("Cast", 1, cast);
             REGISTER_OPERATOR("Ceil", 1, ceil);

From 22e4566faa7bedf52ed15049155b89978b54db9f Mon Sep 17 00:00:00 2001
From: Elizaveta Lobanova <elizaveta.lobanova@intel.com>
Date: Fri, 30 Apr 2021 15:42:27 +0300
Subject: [PATCH 24/73] [GNA] Remove extra reshape before maxpool. Fix
 activation and maxpool reordering. (#5404)

Fix convolution input transposition for Kaldi models with FakeQuantise layers.
Fix floating point error in gnaFuncTests with debug logs.
---
 .../src/gna_plugin/gna_graph_compiler.cpp     |   7 +
 .../src/gna_plugin/gna_plugin.cpp             |   3 +
 .../gna_plugin/optimizer/gna_pass_manager.cpp |  12 +-
 .../transformations/remove_extra_reshapes.cpp |  31 ++++
 .../transformations/remove_extra_reshapes.hpp |  20 +++
 .../gna/pass_tests/fq_maxpool_reordering.cpp  | 148 ++++++++++++++++++
 .../subgraph_tests/fq_conv_fq_affine.cpp      |   3 +
 .../subgraph/fq_conv_fq_affine.hpp            |   1 +
 .../src/subgraph/fq_conv_fq_affine.cpp        |  22 ++-
 9 files changed, 238 insertions(+), 9 deletions(-)
 create mode 100644 inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
 create mode 100644 inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp
 create mode 100644 inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp

diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 76e2f81940e..541bd142c3e 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -453,6 +453,12 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
     size_t num_data_bytes_in = (num_inputs + num_input_padding) * inputs->getPrecision().size();
 
     auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
+    // Skip FakeQuantize and ScaleShift between Convolution and Input
+    if (LayerInfo(connectedInputLayer).isFakeQuantize()) {
+            connectedInputLayer = CNNNetPrevLayerSkipCertain(connectedInputLayer, 0, [](CNNLayerPtr l) {
+            return LayerInfo(l).isScaleShift();
+        });
+    }
 
     // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
     if (!dnn->do_rotate_input) {
@@ -626,6 +632,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
         ptr_weights,
         ptr_biases);
 
+    currentComponent.num_bytes_per_input = inputs->getPrecision().size();
     currentComponent.num_bytes_per_output = outputs->getPrecision().size();
 
     if (inputs->getLayout() == Layout::NHWC) {
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index df0a71fc1ce..a1f7e003dc7 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -56,6 +56,8 @@
 #include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
 
+#include "transformations/remove_extra_reshapes.hpp"
+
 #if GNA_LIB_VER == 2
 #include <gna2-model-api.h>
 
@@ -663,6 +665,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
         manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
         manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
         manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
+        manager.register_pass<RemoveExtraReshapes>();
         // UnrollTI should be the last transformation in the transformation pipeline
         manager.register_pass<ngraph::pass::UnrollTensorIterator>();
 
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index 0a7a4a44e02..4c40692d239 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -371,19 +371,21 @@ namespace {
 
 void ReorderMaxPoolPass::run() {
     // detecting following pattern
-    // conv->relu->maxpooling
-    // changing it to conv->maxpooling->relu
+    // conv->activation->maxpooling
+    // changing it to conv->maxpooling->activation
     for (auto & l : *pLayers) {
         auto pool = LayerInfo(l);
         if (!pool.isMaxPooling()) continue;
 
         // don't reorder if pooling is 2D for CNN2D
         auto pooling = dynamic_cast<PoolingLayer*>(l.get());
-        if (pooling == nullptr || (is2D(pooling->_kernel) || is2D(pooling->_stride))) continue;
+        // todo: return the check for stride after it'll be fixed in MO for Kaldi models
+        if (pooling == nullptr || (is2D(pooling->_kernel))) continue;
 
         // checking prev layer type
-        auto activation = LayerInfo(CNNNetPrevLayer(l));
-        if (!activation.isActivation()) continue;
+        auto actLayer = CNNNetPrevLayer(l);
+        auto activation = LayerInfo(actLayer);
+        if (!activation.isActivation() || actLayer->insData.size() > 1) continue;
 
         // if activation came from convolution
         auto convolution = LayerInfo(CNNNetPrevLayer(static_cast<InferenceEngine::CNNLayer*>(activation)));
diff --git a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
new file mode 100644
index 00000000000..cbb4cb625d0
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/remove_extra_reshapes.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+using namespace GNAPluginNS;
+
+NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0);
+
+RemoveExtraReshapes::RemoveExtraReshapes() {
+    const auto reshape = ngraph::pattern::wrap_type<ngraph::opset1::Reshape>();
+    const auto pooling = ngraph::pattern::wrap_type<ngraph::opset1::MaxPool>({reshape});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
+        if (reshape_node->get_input_shape(0) != reshape_node->get_output_shape(0)) {
+            return false;
+        }
+
+        ngraph::replace_output_update_name(reshape_node->output(0), reshape_node->input_value(0));
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(pooling, "RemoveExtraReshapes");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp
new file mode 100644
index 00000000000..4f189abdba5
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace GNAPluginNS {
+
+/**
+ * @brief Removes reshapes before MaxPool which do nothing. Such reshapes can be a result of conversion from IR10 to IR7.
+ */
+class RemoveExtraReshapes : public ngraph::pass::MatcherPass {
+public:
+  NGRAPH_RTTI_DECLARATION;
+  RemoveExtraReshapes();
+};
+
+} // namespace GNAPluginNS
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp
new file mode 100644
index 00000000000..316df2ca9d7
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp
@@ -0,0 +1,148 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::vector<size_t>,                // Input Shape
+    std::pair<float, float>,            // Input Min and Max
+    size_t                              // Levels
+> fqMaxpoolReorderingParams;
+
+namespace LayerTestsDefinitions {
+
+class FQMaxpoolReordering : public testing::WithParamInterface<fqMaxpoolReorderingParams>,
+    public LayerTestsUtils::LayerTestsCommon {
+    float inputDataMin = 0.0f;
+    float inputDataMax = 0.0f;
+    float inputDataResolution = 1.0f;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<fqMaxpoolReorderingParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        size_t levels = 0;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;
+
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
+        result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
+        result << "_levels=" << levels;
+
+        return result.str();
+    }
+
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution);
+    }
+
+protected:
+    void SetUp() override {
+        InferenceEngine::Precision netPrecision;
+
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        size_t levels = 0;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.first });
+        auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputMinMax.second });
+
+        auto inputVector = ngraph::builder::makeParams(ngPrc, {inputShape});
+
+        auto inputFQ = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
+            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+
+        auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f });
+        auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});
+        auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax});
+        auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
+            convLowNode, convHighNode, convLowNode, convHighNode, levels);
+        auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
+
+        auto conv = std::make_shared<ngraph::opset1::Convolution>(inputFQ, convWeightsFQ, std::vector<size_t>{ 1, 1 },
+                                                                std::vector<ptrdiff_t>{ 0, 0 }, std::vector<ptrdiff_t>{ 0, 0 },
+                                                                std::vector<size_t>{ 1, 1 },
+                                                                ngraph::op::PadType::VALID);
+        auto biasesWeightsNode = ngraph::builder::makeConstant(ngPrc, {}, std::vector<float>{ 0.0f });
+        auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);
+
+        auto convFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(add,
+            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+
+        auto maxpool = ngraph::builder::makePooling(convFQNode, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
+                                                    ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX);
+
+        ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(maxpool)};
+        function = std::make_shared<ngraph::Function>(results, inputVector, "FQMaxPoolReorder");
+    }
+};
+
+TEST_P(FQMaxpoolReordering, CompareWithRefImpl) {
+    Run();
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+    }
+};
+
+const std::vector<std::vector<size_t>> inputShape = {
+    {1, 1, 1, 1024},
+    {1, 8, 1, 168},
+};
+
+const std::vector<std::pair<float, float>> inputMinMax = {
+    {-0.5, 0.5},
+    {-2, 2},
+    {-8, 8}
+};
+
+const std::vector<size_t> levels = {
+    65535,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(inputShape),
+        ::testing::ValuesIn(inputMinMax),
+        ::testing::ValuesIn(levels)),
+    FQMaxpoolReordering::getTestCaseName);
+} // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp
index 28f414ee11d..e48d4ad12c0 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp
@@ -46,10 +46,13 @@ const auto convParams = ::testing::Combine(
         ::testing::ValuesIn(outputChannels)
 );
 
+const std::vector<bool> permute = {false, true};
+
 INSTANTIATE_TEST_CASE_P(smoke_FqConvFqAffineTest, FqConvFqAffineTest,
                         ::testing::Combine(
                                 fqParams,
                                 convParams,
+                                ::testing::ValuesIn(permute),
                                 ::testing::ValuesIn(netPrecisions),
                                 ::testing::ValuesIn(inputShapes),
                                 ::testing::Values(CommonTestUtils::DEVICE_GNA),
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp
index a8ca812b749..30c014dd498 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp
@@ -30,6 +30,7 @@ typedef std::tuple<
 typedef std::tuple<
         FqSpecificParams,
         ConvParams,
+        bool,                              // Permute after convolution
         InferenceEngine::Precision,        // Net precision
         InferenceEngine::SizeVector,       // Input shapes
         LayerTestsUtils::TargetDevice,     // Device name
diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp
index cd4370d9661..6255b41db01 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp
@@ -9,11 +9,12 @@ namespace SubgraphTestsDefinitions {
 std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqAffineTestParamsSet> obj) {
     FqSpecificParams fqParams;
     ConvParams convParams;
+    bool permute;
     InferenceEngine::Precision netPrecision;
     InferenceEngine::SizeVector inputShapes;
     std::string targetDevice;
     std::map<std::string, std::string> config;
-    std::tie(fqParams, convParams, netPrecision, inputShapes, targetDevice, config) = obj.param;
+    std::tie(fqParams, convParams, permute, netPrecision, inputShapes, targetDevice, config) = obj.param;
 
     std::vector<size_t> levels;
     std::vector<float> inputArg;
@@ -39,17 +40,19 @@ std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo<FqConvFqA
     result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
     result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
     result << "IC=" << inputChannels << "_";
-    result << "OC=" << outputChannels;
+    result << "OC=" << outputChannels << "_";
+    result << "permute=" << permute << "\n";
     return result.str();
 }
 
 void FqConvFqAffineTest::SetUp() {
     FqSpecificParams fqParams;
     ConvParams convParams;
+    bool permute;
     std::vector<size_t> inputShape;
     std::map<std::string, std::string> config;
     auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
-    std::tie(fqParams, convParams, netPrecision, inputShape, targetDevice, config) = this->GetParam();
+    std::tie(fqParams, convParams, permute, netPrecision, inputShape, targetDevice, config) = this->GetParam();
     configuration.insert(config.begin(), config.end());
 
     std::vector<size_t> levels;
@@ -100,8 +103,19 @@ void FqConvFqAffineTest::SetUp() {
     auto heightAfterConv = (convInputShape[2] - kernelShape[0]) / strides[0] + 1;
     std::vector<size_t> outFormShapes = {1,  outputChannels * widthAfterConv * heightAfterConv };
 
+    ngraph::Output<ngraph::Node> nodeBeforeReshape;
+    if (permute) {
+        auto permuteOrder = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64,
+                                                                       ngraph::Shape{4},
+                                                                       ngraph::Shape{{0, 3, 2, 1}});
+        auto transpose = std::make_shared<ngraph::opset1::Transpose>(add, permuteOrder);
+        nodeBeforeReshape = transpose;
+    } else {
+        nodeBeforeReshape = add;
+    }
+
     auto reshapePattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
-    auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(add, reshapePattern2, false);
+    auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(nodeBeforeReshape, reshapePattern2, false);
 
     auto matMulWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outFormShapes[1], outFormShapes[1]}, { 1.0f });
     auto matMulLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin});

From c99257e1f908e6a9afff3bae4f4f50f944076e27 Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Fri, 30 Apr 2021 15:48:18 +0300
Subject: [PATCH 25/73] [IE TESTS] Fix bug with constant, const and parameters
 in summary (#5463)

* [IE TESTS] Fix bug with constant, const and parameters in summary

* Remove lambda
---
 .../src/layer_test_utils/summary.cpp          | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/src/layer_test_utils/summary.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/src/layer_test_utils/summary.cpp
index 1f203eb8622..522f882fb25 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/src/layer_test_utils/summary.cpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/src/layer_test_utils/summary.cpp
@@ -114,25 +114,31 @@ std::map<std::string, PassRate> Summary::getOpStatisticFromReport() {
 }
 
 void Summary::updateOPsStats(const std::shared_ptr<ngraph::Function> &function, const PassRate::Statuses &status) {
+    bool isFunctionalGraph = false;
     for (const auto &op : function->get_ordered_ops()) {
-        if (ngraph::is_type<ngraph::op::Parameter>(op) ||
+        if (!ngraph::is_type<ngraph::op::Parameter>(op) &&
+            !ngraph::is_type<ngraph::op::Constant>(op) &&
+            !ngraph::is_type<ngraph::op::Result>(op)) {
+            isFunctionalGraph = true;
+            break;
+        }
+    }
+
+    for (const auto &op : function->get_ordered_ops()) {
+        if ((ngraph::is_type<ngraph::op::Parameter>(op) ||
             ngraph::is_type<ngraph::op::Constant>(op) ||
-            ngraph::is_type<ngraph::op::Result>(op)) {
+            ngraph::is_type<ngraph::op::Result>(op)) && isFunctionalGraph) {
             continue;
         } else if (ngraph::is_type<ngraph::op::TensorIterator>(op)) {
             updateOPsStats(op->get_type_info(), status);
             auto ti = ngraph::as_type_ptr<ngraph::op::TensorIterator>(op);
             auto ti_body = ti->get_function();
-            for (const auto &ti_op : ti_body->get_ordered_ops()) {
-                updateOPsStats(ti_op->get_type_info(), status);
-            }
+            updateOPsStats(ti_body, status);
         } else if (ngraph::is_type<ngraph::op::v5::Loop>(op)) {
             updateOPsStats(op->get_type_info(), status);
             auto loop = ngraph::as_type_ptr<ngraph::op::v5::Loop>(op);
             auto loop_body = loop->get_function();
-            for (const auto &loop_op : loop_body->get_ordered_ops()) {
-                updateOPsStats(loop_op->get_type_info(), status);
-            }
+            updateOPsStats(loop_body, status);
         } else {
             updateOPsStats(op->get_type_info(), status);
         }

From a7353f4b2817f3c4528d9f6ac13078ef507373ce Mon Sep 17 00:00:00 2001
From: Katarzyna Mitrus <katarzyna.mitrus@intel.com>
Date: Fri, 30 Apr 2021 15:53:14 +0200
Subject: [PATCH 26/73] Revision of Sqrt op and reference implementation
 (#5446)

* Add NGRAPH_RTTI

* Instantiate unary ops type prop tests for Sqrt

* Add sqrt backend tests

* Add rounding for int types to Sqrt reference implementation
---
 ngraph/core/include/ngraph/op/sqrt.hpp        |   4 +-
 .../include/ngraph/runtime/reference/sqrt.hpp |  13 ++-
 ngraph/core/src/op/sqrt.cpp                   |   2 +-
 ngraph/test/backend/sqrt.in.cpp               | 100 ++++++++++++------
 ngraph/test/type_prop/unary_ops.cpp           |   2 +-
 5 files changed, 84 insertions(+), 37 deletions(-)

diff --git a/ngraph/core/include/ngraph/op/sqrt.hpp b/ngraph/core/include/ngraph/op/sqrt.hpp
index ea601e969c9..ca135616b60 100644
--- a/ngraph/core/include/ngraph/op/sqrt.hpp
+++ b/ngraph/core/include/ngraph/op/sqrt.hpp
@@ -30,8 +30,8 @@ namespace ngraph
             class NGRAPH_API Sqrt : public util::UnaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Sqrt", 0};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs a square operation.
                 ///
                 /// \param arg Node that produces the input tensor.
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/sqrt.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/sqrt.hpp
index bcafec998d3..6a499cb4f61 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/sqrt.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/sqrt.hpp
@@ -6,6 +6,7 @@
 
 #include <cmath>
 #include <cstddef>
+#include <type_traits>
 
 namespace ngraph
 {
@@ -14,13 +15,23 @@ namespace ngraph
         namespace reference
         {
             template <typename T>
-            void sqrt(const T* arg, T* out, size_t count)
+            typename std::enable_if<!std::is_integral<T>::value>::type
+                sqrt(const T* arg, T* out, size_t count)
             {
                 for (size_t i = 0; i < count; i++)
                 {
                     out[i] = std::sqrt(arg[i]);
                 }
             }
+            template <typename T>
+            typename std::enable_if<std::is_integral<T>::value>::type
+                sqrt(const T* arg, T* out, size_t count)
+            {
+                for (size_t i = 0; i < count; i++)
+                {
+                    out[i] = static_cast<T>(std::round(std::sqrt(arg[i])));
+                }
+            }
         } // namespace reference
     }     // namespace runtime
 } // namespace ngraph
diff --git a/ngraph/core/src/op/sqrt.cpp b/ngraph/core/src/op/sqrt.cpp
index d1a1fca015b..80e36b7775e 100644
--- a/ngraph/core/src/op/sqrt.cpp
+++ b/ngraph/core/src/op/sqrt.cpp
@@ -14,7 +14,7 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::Sqrt::type_info;
+NGRAPH_RTTI_DEFINITION(op::v0::Sqrt, "Sqrt", 0, util::UnaryElementwiseArithmetic);
 
 op::Sqrt::Sqrt(const Output<Node>& arg)
     : UnaryElementwiseArithmetic(arg)
diff --git a/ngraph/test/backend/sqrt.in.cpp b/ngraph/test/backend/sqrt.in.cpp
index d1b3f0061e4..bd959c958ac 100644
--- a/ngraph/test/backend/sqrt.in.cpp
+++ b/ngraph/test/backend/sqrt.in.cpp
@@ -20,56 +20,92 @@
 // clang-format on
 
 #include "gtest/gtest.h"
-#include "runtime/backend.hpp"
-#include "ngraph/runtime/tensor.hpp"
 #include "ngraph/ngraph.hpp"
-#include "util/all_close.hpp"
-#include "util/all_close_f.hpp"
-#include "util/ndarray.hpp"
+#include "util/test_case.hpp"
 #include "util/test_control.hpp"
 #include "util/test_tools.hpp"
+#include "util/engine/test_engines.hpp"
 
 using namespace std;
 using namespace ngraph;
 
 static string s_manifest = "${MANIFEST}";
+using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 
-NGRAPH_TEST(${BACKEND_NAME}, sqrt)
+NGRAPH_TEST(${BACKEND_NAME}, sqrt_basic)
 {
     Shape shape{2, 3};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto f = make_shared<Function>(make_shared<op::Sqrt>(A), ParameterVector{A});
+    auto input_param = make_shared<op::Parameter>(element::f32, shape);
+    auto function =
+        make_shared<Function>(make_shared<op::Sqrt>(input_param), ParameterVector{input_param});
 
-    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    std::vector<float> input_data{16, 4, 81, 100, 10000, 0};
+    std::vector<float> expected_result{4, 2, 9, 10, 100, 0};
 
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::f32, shape);
-    copy_data(a, vector<float>{16, 4, 81, 100, 10000, 0});
-    auto result = backend->create_tensor(element::f32, shape);
-
-    auto handle = backend->compile(f);
-    handle->call_with_validate({result}, {a});
-    EXPECT_TRUE(test::all_close_f(vector<float>{4, 2, 9, 10, 100, 0}, read_vector<float>(result)));
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_input<float>(input_data);
+    test_case.add_expected_output<float>(shape, expected_result);
+    test_case.run();
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, sqrt_negative_inputs)
 {
     Shape shape{4};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto f = make_shared<Function>(make_shared<op::Sqrt>(A), ParameterVector{A});
+    auto input_param = make_shared<op::Parameter>(element::f32, shape);
+    auto function =
+        make_shared<Function>(make_shared<op::Sqrt>(input_param), ParameterVector{input_param});
 
-    auto backend = runtime::Backend::create("${BACKEND_NAME}");
+    std::vector<float> input_data{-1, 4, -81, 100};
+    std::vector<float> expected_result{NAN, 2, NAN, 10};
 
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::f32, shape);
-    copy_data(a, vector<float>{-1, 4, -81, 100});
-    auto result = backend->create_tensor(element::f32, shape);
-
-    auto handle = backend->compile(f);
-    handle->call_with_validate({result}, {a});
-    auto result_val = read_vector<float>(result);
-    EXPECT_TRUE(isnan(result_val[0]));
-    EXPECT_FLOAT_EQ(result_val[1], std::sqrt(4));
-    EXPECT_TRUE(isnan(result_val[2]));
-    EXPECT_FLOAT_EQ(result_val[3], std::sqrt(100));
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_input<float>(input_data);
+    test_case.add_expected_output<float>(shape, expected_result);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, sqrt_integral_inputs)
+{
+    Shape shape{2, 7};
+    auto input_param = make_shared<op::Parameter>(element::i32, shape);
+    auto function =
+        make_shared<Function>(make_shared<op::Sqrt>(input_param), ParameterVector{input_param});
+
+    std::vector<int> input_data{4, 7, 9, 10, 80, 55, 6, 1, 23, 233, 256, 474, 1024, 110889};
+    std::vector<int> expected_result{2, 3, 3, 3, 9, 7, 2, 1, 5, 15, 16, 22, 32, 333};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_input<int>(input_data);
+    test_case.add_expected_output<int>(shape, expected_result);
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, sqrt_floating_inputs)
+{
+    Shape shape{2, 7};
+    auto input_param = make_shared<op::Parameter>(element::f32, shape);
+    auto function =
+        make_shared<Function>(make_shared<op::Sqrt>(input_param), ParameterVector{input_param});
+
+    std::vector<float> input_data{
+        4, 7, 9, 10, 80, 55, 6.25, 0.9, 23.33, 233, 256, 473.7891, 1024, 111108.88};
+    std::vector<float> expected_result{2.,
+                                       2.6457512,
+                                       3.,
+                                       3.1622777,
+                                       8.944272,
+                                       7.4161983,
+                                       2.5,
+                                       0.94868326,
+                                       4.830114,
+                                       15.264338,
+                                       16.,
+                                       21.766697,
+                                       32.,
+                                       333.33};
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_input<float>(input_data);
+    test_case.add_expected_output<float>(shape, expected_result);
+    test_case.run();
 }
diff --git a/ngraph/test/type_prop/unary_ops.cpp b/ngraph/test/type_prop/unary_ops.cpp
index c0b500bf717..f9c2160d2af 100644
--- a/ngraph/test/type_prop/unary_ops.cpp
+++ b/ngraph/test/type_prop/unary_ops.cpp
@@ -96,6 +96,6 @@ REGISTER_TYPED_TEST_CASE_P(UnaryOperator,
                            dynamic_rank_input_shape_3D,
                            dynamic_rank_input_shape_full);
 
-using Types = ::testing::Types<op::Acos, op::Asin, op::Abs, op::Sin, op::Exp>;
+using Types = ::testing::Types<op::Acos, op::Asin, op::Abs, op::Sqrt, op::Sin, op::Exp>;
 
 INSTANTIATE_TYPED_TEST_CASE_P(type_prop, UnaryOperator, Types);

From bb022e2d26088c69a39baeac467a18e3e7cdbe51 Mon Sep 17 00:00:00 2001
From: Vitaly Tuzov <terfendail@mediana.jetos.com>
Date: Fri, 30 Apr 2021 19:17:48 +0300
Subject: [PATCH 27/73] Added test for opset7::Gather (#5373)

---
 .../include/single_layer_tests/gather.hpp     |  4 ++
 .../single_layer/gather.hpp                   | 22 ++++++++++
 .../src/single_layer/gather.cpp               | 42 +++++++++++++++++++
 .../include/ngraph_functions/builders.hpp     |  1 +
 4 files changed, 69 insertions(+)

diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp
index d5aaa661dbd..49191b69553 100644
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gather.hpp
@@ -12,4 +12,8 @@ TEST_P(GatherLayerTest, CompareWithRefs) {
     Run();
 };
 
+TEST_P(Gather7LayerTest, CompareWithRefs) {
+    Run();
+};
+
 }  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp
index 071b9e2cb02..5fd19bbacda 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/gather.hpp
@@ -41,4 +41,26 @@ protected:
     void SetUp() override;
 };
 
+
+typedef std::tuple<
+    std::vector<size_t>,               // Input shapes
+    std::vector<size_t>,               // Indices shape
+    std::tuple<int, int>,              // Gather axis and batch
+    InferenceEngine::Precision,        // Network precision
+    InferenceEngine::Precision,        // Input precision
+    InferenceEngine::Precision,        // Output precision
+    InferenceEngine::Layout,           // Input layout
+    InferenceEngine::Layout,           // Output layout
+    std::string                        // Device name
+> gather7ParamsTuple;
+
+class Gather7LayerTest : public testing::WithParamInterface<gather7ParamsTuple>,
+                         virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<gather7ParamsTuple>& obj);
+
+protected:
+    void SetUp() override;
+};
+
 }  // namespace LayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather.cpp
index 77aca8d652e..9f57e1d1be1 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather.cpp
@@ -51,4 +51,46 @@ void GatherLayerTest::SetUp() {
     GatherLayerTestBase::SetUp(GetParam());
 }
 
+std::string Gather7LayerTest::getTestCaseName(const testing::TestParamInfo<gather7ParamsTuple>& obj) {
+    std::tuple<int, int> axis_batchIdx;
+    std::vector<int> indices;
+    std::vector<size_t> indicesShape, inputShape;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::Precision inPrc, outPrc;
+    InferenceEngine::Layout inLayout, outLayout;
+    std::string targetName;
+    std::tie(inputShape, indicesShape, axis_batchIdx, netPrecision, inPrc, outPrc, inLayout, outLayout, targetName) = obj.param;
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+    result << "axis=" << std::get<0>(axis_batchIdx) << "_";
+    result << "batchIdx=" << std::get<1>(axis_batchIdx) << "_";
+    result << "indicesShape=" << CommonTestUtils::vec2str(indicesShape) << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "inPRC=" << inPrc.name() << "_";
+    result << "outPRC=" << outPrc.name() << "_";
+    result << "inL=" << inLayout << "_";
+    result << "outL=" << outLayout << "_";
+    result << "trgDev=" << targetName << "_";
+    return result.str();
+}
+
+void Gather7LayerTest::SetUp() {
+    std::tuple<int, int> axis_batchIdx;
+    std::vector<size_t> indicesShape;
+    std::vector<size_t> inputShape;
+    InferenceEngine::Precision netPrecision;
+    std::tie(inputShape, indicesShape, axis_batchIdx, netPrecision, inPrc, outPrc, inLayout, outLayout, targetDevice) = GetParam();
+    int axis = std::get<0>(axis_batchIdx);
+    int batchIdx = std::get<1>(axis_batchIdx);
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto functionParams = ngraph::builder::makeParams(ngPrc, { inputShape });
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(functionParams));
+    auto indicesNode = ngraph::builder::makeConstant<int>(ngraph::element::i64, indicesShape, {}, true,
+                                                          inputShape[axis < 0 ? axis + inputShape.size() : axis] - 1, 0);
+    auto axisNode = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({}), { axis });
+    auto gather = std::make_shared<ngraph::opset7::Gather>(paramOuts[0], indicesNode, axisNode, batchIdx);
+    ngraph::ResultVector results{ std::make_shared<ngraph::opset7::Result>(gather) };
+    function = std::make_shared<ngraph::Function>(results, functionParams, "gather");
+}
+
 }  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
index b76b1561dbf..292776c307b 100644
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
@@ -13,6 +13,7 @@
 #include <ngraph/opsets/opset4.hpp>
 #include <ngraph/opsets/opset5.hpp>
 #include <ngraph/opsets/opset6.hpp>
+#include <ngraph/opsets/opset7.hpp>
 
 #include "ngraph_functions/utils/data_utils.hpp"
 

From 7d2ec02d65c5326dbd54b0a8a322970e32884ae3 Mon Sep 17 00:00:00 2001
From: Alexey Varyzgin <alexey.varyzgin@intel.com>
Date: Mon, 3 May 2021 13:29:38 +0300
Subject: [PATCH 28/73] [INT8][BF16] INT8 + BF16 feature was enabled (#5059)

---
 inference-engine/src/mkldnn_plugin/config.cpp |  7 +++--
 inference-engine/src/mkldnn_plugin/config.h   |  1 +
 .../src/mkldnn_plugin/mkldnn_exec_network.cpp | 13 ++++++---
 .../mkldnn_plugin/mkldnn_graph_optimizer.cpp  | 29 +++++++++++++++++++
 .../mkldnn_plugin/nodes/mkldnn_conv_node.cpp  |  5 ++++
 .../nodes/mkldnn_fullyconnected_node.cpp      | 11 +++++++
 .../nodes/mkldnn_pooling_node.cpp             |  3 ++
 7 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/config.cpp b/inference-engine/src/mkldnn_plugin/config.cpp
index 16eb4c9e130..e16d7e6ccde 100644
--- a/inference-engine/src/mkldnn_plugin/config.cpp
+++ b/inference-engine/src/mkldnn_plugin/config.cpp
@@ -98,12 +98,15 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
             dumpQuantizedGraphToIr = val;
         } else if (key == PluginConfigParams::KEY_ENFORCE_BF16) {
             if (val == PluginConfigParams::YES) {
-                if (with_cpu_x86_avx512_core())
+                if (with_cpu_x86_avx512_core()) {
                     enforceBF16 = true;
-                else
+                    manualEnforceBF16 = true;
+                } else {
                     IE_THROW() << "Platform doesn't support BF16 format";
+                }
             } else if (val == PluginConfigParams::NO) {
                 enforceBF16 = false;
+                manualEnforceBF16 = false;
             } else {
                 IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
                     << ". Expected only YES/NO";
diff --git a/inference-engine/src/mkldnn_plugin/config.h b/inference-engine/src/mkldnn_plugin/config.h
index 94f9a77835f..0be5261549f 100644
--- a/inference-engine/src/mkldnn_plugin/config.h
+++ b/inference-engine/src/mkldnn_plugin/config.h
@@ -34,6 +34,7 @@ struct Config {
 #else
     LPTransformsMode lpTransformsMode = LPTransformsMode::On;
     bool enforceBF16 = true;
+    bool manualEnforceBF16 = false;
 #endif
 
     void readProperties(const std::map<std::string, std::string> &config);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
index f3acd6d5a71..8122cf1054f 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@@ -52,8 +52,6 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
     bool isFloatModel = true;
     if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) {
         // Check if network is INT8 or Binary.
-        // BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution:
-        // BF16 + INT8 or BF16 + BIN.
         CNNNetworkIterator iter(network);
         while (iter != CNNNetworkIterator()) {
             if (CaselessEq<std::string>()((*iter)->type, "FakeQuantize")) {
@@ -87,12 +85,19 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
             }
         };
 
-        if (with_cpu_x86_avx512_core() && isFloatModel) {
+        if (with_cpu_x86_avx512_core()) {
             // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin.
             // Otherwise, only layers marked as BF16 in '_clonedNetwork' will be performed in bfloat16 mode.
             // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin.
-            if (cfg.enforceBF16 == true)
+
+            // BF16 + INT8 or BF16 + BIN models will be performed in mixed precision execution only if
+            // enforceBF16 flag was set manually
+            if (isFloatModel == false) {
+                if (cfg.manualEnforceBF16 == true)
+                    changePrecisionBF16(Precision::FP32, Precision::BF16);
+            } else if (cfg.enforceBF16 == true) {
                 changePrecisionBF16(Precision::FP32, Precision::BF16);
+            }
         } else {
             changePrecisionBF16(Precision::BF16, Precision::FP32);
         }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
index 0976b5f1961..781120774ba 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -664,6 +664,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
     }
 }
 
+static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+    return childNode->getType() == Quantize &&
+        one_of(Precision::BF16,
+            parentNode->getCnnLayer()->precision,
+            childNode->getCnnLayer()->precision,
+            parentNode->getCnnLayer()->outData[0].get()->getPrecision(),
+            childNode->getCnnLayer()->outData[0].get()->getPrecision());
+}
+
 void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
@@ -754,6 +763,12 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
             continue;
         }
 
+        //  BF16 Quantize Layer Fusing Disabling
+        if (BF16QuantizeNodeFusing(parentNode, childNode)) {
+            parent++;
+            continue;
+        }
+
         parentNode->fuseWith(childNode);
 
         if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
@@ -1011,6 +1026,10 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndQuantize(MKLDNNGraph &graph) {
         auto child = parent->getChildEdgeAt(0)->getChild();
         if (!isSutableChildNode(child)) continue;
 
+        //  BF16 Quantize Layer Fusing Disabling
+        if (BF16QuantizeNodeFusing(parent, child))
+            continue;
+
         parent->fuseWith(child);
 
         auto parents = child->parentEdges;
@@ -1073,6 +1092,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
             continue;
         }
 
+        //  BF16 Quantize Layer Fusing Disabling
+        if (BF16QuantizeNodeFusing(parentNode, childNode)) {
+            parent++;
+            continue;
+        }
+
         parentNode->fuseWith(childNode);
 
         if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
@@ -1117,6 +1142,10 @@ void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph)
         auto child = parent->getChildEdgeAt(0)->getChild();
         if (!isSutableChildNode(parent, child)) continue;
 
+        //  BF16 Quantize Layer Fusing Disabling
+        if (BF16QuantizeNodeFusing(parent, child))
+            continue;
+
         parent->fuseWith(child);
 
         auto parents = child->parentEdges;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
index d6106b68165..d0411382b2a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@@ -278,6 +278,11 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
 
     MKLDNNMemoryDesc in_candidate, out_candidate;
     if (canBeExecutedInInt8()) {
+        //  We have to extend convolution_x8s8s32x from oneDNN to support BF16 output data type
+        if (outputDataType == memory::data_type::bf16)
+            outputDataType = memory::data_type::f32;
+        if (eltwisePrecision == Precision::BF16)
+            eltwisePrecision = Precision::FP32;
         in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
                 getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
         out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType,
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
index 1d16892181c..46e01398166 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
@@ -71,6 +71,7 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
         }
         auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->insData[1].lock()->getPrecision());
 
+        //  We have to extend gemm_x8s8s32x_inner_product_fwd_t from oneDNN to support BF16 output data type
         if ((!one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) || weightsDataType != memory::data_type::s8) &&
                 inputDataType != memory::data_type::bf16) {
             inputDataType = memory::data_type::f32;
@@ -78,6 +79,16 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
         }
     }
 
+    if (one_of(inputDataType , memory::data_type::u8, memory::data_type::s8)
+        && outputDataType == memory::data_type::bf16) {
+        outputDataType = memory::data_type::f32;
+    }
+
+    if (inputDataType == memory::data_type::bf16
+        && one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) {
+        outputDataType = memory::data_type::bf16;
+    }
+
     auto * fcLayer = dynamic_cast<FullyConnectedLayer*>(getCnnLayer().get());
     if (fcLayer == nullptr)
         IE_THROW() << "Cannot convert fully connected layer.";
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
index 0693453ed73..2f5c459fdd8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
@@ -104,6 +104,9 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
         effective_pad_end[i] = (dst - calc_dst) * stride[i];
     }
     if (inputPrecision == Precision::I8 || inputPrecision == Precision::U8) {
+        //  We have to extend i8i8_pooling_fwd_t from oneDNN to support BF16 output data type
+        if (outputDataType == memory::data_type::bf16)
+            outputDataType = memory::data_type::f32;
         // i8 layers supports only ndhwc and nhwc layouts
         MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, parentDims.ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc};
         MKLDNNMemoryDesc out_candidate{childDims, outputDataType, parentDims.ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc};

From 0808975a379bc2d7aa497af501203f38a5c78e8f Mon Sep 17 00:00:00 2001
From: Chenhu Wang <chenhu.wang@intel.com>
Date: Mon, 3 May 2021 19:48:42 +0800
Subject: [PATCH 29/73] [CPU]
 windows_Interpolate_fused-FQ_nearest-mode_nspc-layout_fix (#5317)

---
 .../mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp   | 11 +++++++++--
 .../plugin/cpu/single_layer_tests/interpolate.cpp     |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
index b87c33b7320..935bf3bae61 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
@@ -386,6 +386,13 @@ private:
         Xbyak::Label out_loop_label;
         Xbyak::Label out_loop_end;
 
+        Xbyak::Reg64 reg_work_amount_bk = reg_src_aux2;
+        Xbyak::Reg64 reg_oc_off_bk = rsi;
+        mov(reg_work_amount_bk, ptr[reg_params + GET_OFF(work_amount)]);
+        if (attr_.post_ops_.len() != 0) {
+            mov(reg_oc_off_bk, ptr[reg_params + GET_OFF(oc_off)]);
+        }
+
         Xbyak::Reg64 reg_work_amount_out = reg_src_aux1;
         mov(reg_work_amount_out, jcp_.OW);
         L(out_loop_label);
@@ -410,9 +417,9 @@ private:
             mov(reg_index_offset, dword[reg_index]);
             add(reg_src_aux, reg_index_offset);
 
-            mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]);
+            mov(reg_work_amount, reg_work_amount_bk);
             if (attr_.post_ops_.len() != 0)
-                mov(reg_oc_off, ptr[reg_params + GET_OFF(oc_off)]);
+                mov(reg_oc_off, reg_oc_off_bk);
 
             L(nn_loop_label);
             {
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp
index a381fcf07be..ec57da59409 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp
@@ -236,6 +236,7 @@ const std::vector<fusingSpecificParams> interpolateFusingParamsSet{
         emptyFusingSpec,
         fusingRelu,
         fusingSwish,
+        fusingFakeQuantizePerChannelRelu,
 };
 
 std::vector<std::map<std::string, std::string>> filterAdditionalConfig() {

From 5d8f209df6a47edc3722ed2e4ea55ce3fac3f5b2 Mon Sep 17 00:00:00 2001
From: Aleksandr Pertovsky <aleksandr.pertovsky@intel.com>
Date: Mon, 3 May 2021 15:01:05 +0300
Subject: [PATCH 30/73] [CPU] Add Roll support (#5112)

---
 .../src/mkldnn_plugin/mkldnn_node.cpp         |   1 +
 .../src/mkldnn_plugin/mkldnn_node.h           |   5 +-
 .../mkldnn_plugin/nodes/mkldnn_roll_node.cpp  | 209 ++++++++++++++++++
 .../mkldnn_plugin/nodes/mkldnn_roll_node.h    |  41 ++++
 .../single_layer_tests/roll.cpp               |  96 ++++++++
 .../include/single_layer_tests/roll.hpp       |  15 ++
 .../shared_test_classes/single_layer/roll.hpp |  30 +++
 .../src/single_layer/roll.cpp                 |  46 ++++
 .../include/ngraph_functions/builders.hpp     |   4 +
 .../ngraph_functions/src/roll.cpp             |  17 ++
 ngraph/python/tests/test_ngraph/test_roll.py  |   2 -
 11 files changed, 463 insertions(+), 3 deletions(-)
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h
 create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp
 create mode 100644 inference-engine/tests/functional/plugin/shared/include/single_layer_tests/roll.hpp
 create mode 100644 inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roll.hpp
 create mode 100644 inference-engine/tests/functional/shared_test_classes/src/single_layer/roll.cpp
 create mode 100644 inference-engine/tests/ngraph_helpers/ngraph_functions/src/roll.cpp

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index d3af44347ad..f446c339d39 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -143,6 +143,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
         { "ReduceSum", ReduceSum},
         { "ReduceSumSquare", ReduceSumSquare},
         { "Erf", Eltwise },
+        { "Roll", Roll },
 };
 
 Type TypeFromName(const std::string type) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index 169bde711c8..483c315e955 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -87,7 +87,8 @@ enum Type {
     ReduceOr,
     ReduceProd,
     ReduceSum,
-    ReduceSumSquare
+    ReduceSumSquare,
+    Roll
 };
 
 Type TypeFromName(const std::string type);
@@ -206,6 +207,8 @@ static std::string NameFromType(Type type) {
             return "ReduceSum";
         case ReduceSumSquare:
             return "ReduceSumSquare";
+        case Roll:
+            return "Roll";
         default:
             return "Unknown";
     }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
new file mode 100644
index 00000000000..aa1d6623463
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
@@ -0,0 +1,209 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+#include <cmath>
+#include <mkldnn_extension_utils.h>
+
+#include "mkldnn_roll_node.h"
+#include "ie_parallel.hpp"
+#include "ie_precision.hpp"
+#include "mkldnn/ie_mkldnn.h"
+#include "utils/general_utils.h"
+#include "common/cpu_memcpy.h"
+
+using namespace mkldnn;
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+MKLDNNRollNode::MKLDNNRollNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+                MKLDNNNode(layer, eng, cache) {
+    layerErrorPrefix = "Roll layer with name '" + layer->name + "'";
+    if (layer->insData.size() != numberOfInputs) {
+        IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!";
+    }
+
+    /* Data */
+    auto data = layer->insData[DATA_INDEX].lock();
+    if (data == nullptr) {
+        IE_THROW() << layerErrorPrefix << " has nullable data";
+    }
+
+    const auto &dataTensor = data->getTensorDesc();
+    shape = dataTensor.getDims();
+    const auto &dataPrecision = dataTensor.getPrecision();
+
+    if (std::find(supportedPrecisionSizes.begin(), supportedPrecisionSizes.end(), dataPrecision.size()) == supportedPrecisionSizes.end())
+        IE_THROW() << layerErrorPrefix << "has unsupported precision: " << dataPrecision.name();
+
+    if (shape.size() < 1) {
+        IE_THROW() << layerErrorPrefix << " doesn't support 'data' input tensor with rank: " << shape.size();
+    }
+    numOfDims = shape.size();
+
+    if (shape != layer->outData[0]->getTensorDesc().getDims()) {
+        IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions";
+    }
+
+    /* Axes */
+    auto axesData = layer->insData[AXES_INDEX].lock();
+    if (axesData == nullptr) {
+        IE_THROW() << layerErrorPrefix << " has nullable 'axes' data";
+    }
+    const auto& axesTensor = axesData->getTensorDesc();
+    const auto& axesTensorPrec = axesData->getTensorDesc().getPrecision();
+    if (axesTensorPrec != Precision::I32 && axesTensorPrec != Precision::I64) {
+        IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name();
+    }
+
+    const auto axesTensorRank = axesTensor.getDims().size();
+    if (axesTensorRank > 1) {
+        IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank;
+    }
+
+    /* Shift */
+    auto shiftData = layer->insData[SHIFT_INDEX].lock();
+    if (shiftData == nullptr) {
+        IE_THROW() << layerErrorPrefix << " has nullable 'shift' data";
+    }
+    const auto& shiftTensor = shiftData->getTensorDesc();
+    const auto& shiftTensorPrec = shiftData->getTensorDesc().getPrecision();
+    if (shiftTensorPrec != Precision::I32 && shiftTensorPrec != Precision::I64) {
+        IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name();
+    }
+
+    const auto shiftTensorRank = shiftTensor.getDims().size();
+    if (shiftTensorRank > 1) {
+        IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank;
+    }
+}
+void MKLDNNRollNode::getSupportedDescriptors() {}
+
+void MKLDNNRollNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    auto inputData = getCnnLayer()->insData[0].lock();
+
+    if (inputData == nullptr) {
+        IE_THROW() << layerErrorPrefix << " has nullable 'data'";
+    }
+
+    InferenceEngine::Precision precision = inputData->getPrecision();
+
+    auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
+
+    auto srcDims = getParentEdgeAt(0)->getDims();
+
+    auto dataMemoryFormat = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
+    InferenceEngine::LayerConfig config;
+    config.dynBatchSupport = false;
+
+    auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig {
+        InferenceEngine::DataConfig dataConfig;
+        dataConfig.inPlace = -1;
+        dataConfig.constant = false;
+        dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims));
+        return dataConfig;
+    };
+
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), dataType));
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), memory::data_type::s32));
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getDims(), memory::data_type::s32));
+
+    config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), dataType));
+
+    supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, dataMemoryFormat});
+}
+
+
+void MKLDNNRollNode::execute(mkldnn::stream strm) {
+    const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getDesc().getPrecision();
+    const auto& dataTypeSize = dataPrecision.size();
+    switch (dataTypeSize) {
+        case sizeof(PrecisionTrait<Precision::I8>::value_type): {
+            rollImpl<PrecisionTrait<Precision::I8>::value_type>();
+            break;
+        }
+        case sizeof(PrecisionTrait<Precision::I16>::value_type): {
+            rollImpl<PrecisionTrait<Precision::I16>::value_type>();
+            break;
+        }
+        case sizeof(PrecisionTrait<Precision::I32>::value_type): {
+            rollImpl<PrecisionTrait<Precision::I32>::value_type>();
+            break;
+        }
+        default:
+            IE_THROW() << layerErrorPrefix <<  "has unsupported 'data' input precision: " << dataPrecision.name();
+    }
+}
+
+size_t MKLDNNRollNode::calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize) {
+    size_t pos = dataOffset / segmentSize % dimSize;
+    size_t shift = (pos + dimShift) % dimSize - pos;
+    return dataOffset + shift * segmentSize;
+}
+
+template <typename DataType>
+void MKLDNNRollNode::rollImpl() {
+    const auto dataEdge = getParentEdgeAt(DATA_INDEX);
+    const auto axesEdge = getParentEdgeAt(AXES_INDEX);
+    const auto shiftsEdge = getParentEdgeAt(SHIFT_INDEX);
+
+    const auto *axes = reinterpret_cast<const int32_t*>(axesEdge->getMemoryPtr()->GetPtr());
+    const auto *shifts = reinterpret_cast<const int32_t*>(shiftsEdge->getMemoryPtr()->GetPtr());
+
+    const auto *input = reinterpret_cast<const DataType*>(dataEdge->getMemoryPtr()->GetPtr());
+    auto *output = reinterpret_cast<DataType*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+    std::vector<size_t> shiftsVector(numOfDims, 0);
+
+    const size_t axesLength = axesEdge->getDims()[0];
+    for (size_t dim = 0; dim < axesLength ; ++dim) {
+        int32_t currentAxis = axes[dim] < 0 ? axes[dim] + numOfDims : axes[dim];
+        int32_t shiftSum = shiftsVector[currentAxis] + shifts[dim];
+        int32_t dimSize = shape[currentAxis];
+        shiftsVector[currentAxis] = (shiftSum % dimSize + dimSize) % dimSize;
+    }
+
+    const size_t blockSize = shape.back();
+    const size_t totalElements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
+    const size_t leftBlockSize = blockSize - shiftsVector.back();
+    const size_t rightBlockSize = blockSize - leftBlockSize;
+    const size_t elementSize = sizeof(DataType);
+
+    const size_t nIterations = totalElements / blockSize;
+    const auto strides = dataEdge->getDesc().getBlockingDesc().getStrides();
+    parallel_for(nIterations, [&](size_t iter) {
+        size_t start = iter * blockSize;
+        size_t leftBlockStartOffset = start;
+        size_t rightBlockStartOffset = start + leftBlockSize;
+
+        for (int dim = numOfDims - 1; dim >= 0; --dim) {
+            leftBlockStartOffset = calculateShiftOffset(leftBlockStartOffset, shiftsVector[dim], strides[dim], shape[dim]);
+            rightBlockStartOffset = calculateShiftOffset(rightBlockStartOffset, shiftsVector[dim], strides[dim], shape[dim]);
+        }
+
+        if (leftBlockSize > 0)
+            cpu_memcpy(output + leftBlockStartOffset,
+                       input + start,
+                       leftBlockSize * elementSize);
+
+
+        if (rightBlockSize > 0)
+            cpu_memcpy(output + rightBlockStartOffset,
+                       input + (start + leftBlockSize),
+                       rightBlockSize * elementSize);
+    });
+}
+
+bool MKLDNNRollNode::created() const {
+    return getType() == Roll;
+}
+
+void MKLDNNRollNode::createPrimitive() {}
+
+const std::vector<size_t> MKLDNNRollNode::supportedPrecisionSizes = {1, 2, 4};
+
+REG_MKLDNN_PRIM_FOR(MKLDNNRollNode, Roll)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h
new file mode 100644
index 00000000000..019d65f6332
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h
@@ -0,0 +1,41 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNRollNode : public MKLDNNNode {
+public:
+    MKLDNNRollNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNRollNode() override = default;
+
+    void getSupportedDescriptors() override;
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override;
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+private:
+    size_t calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize);
+
+    template <typename DataType>
+    void rollImpl();
+
+    std::vector<size_t> shape;
+    const static std::vector<size_t> supportedPrecisionSizes;
+    std::string layerErrorPrefix;
+    size_t numOfDims;
+
+    const size_t DATA_INDEX = 0ul;
+    const size_t SHIFT_INDEX = 1ul;
+    const size_t AXES_INDEX = 2ul;
+    const size_t numberOfInputs = 3ul;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp
new file mode 100644
index 00000000000..f47d29704de
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp
@@ -0,0 +1,96 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/roll.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> inputPrecision = {
+    InferenceEngine::Precision::I8,
+    InferenceEngine::Precision::U8,
+    InferenceEngine::Precision::I16,
+    InferenceEngine::Precision::I32,
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::BF16
+};
+
+const auto testCase2DZeroShifts = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{17, 19}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{0, 0}), // Shift
+    ::testing::Values(std::vector<int64_t>{0, 1}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCase1D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{16}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{5}), // Shift
+    ::testing::Values(std::vector<int64_t>{0}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCase2D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{600, 450}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{300, 250}), // Shift
+    ::testing::Values(std::vector<int64_t>{0, 1}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCase3D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{2, 320, 320}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{160, 160}), // Shift
+    ::testing::Values(std::vector<int64_t>{1, 2}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseNegativeUnorderedAxes4D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{3, 11, 6, 4}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{7, 3}), // Shift
+    ::testing::Values(std::vector<int64_t>{-3, -2}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseRepeatingAxes5D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{2, 16, 32, 32}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{16, 15, 10, 2, 1, 7, 2, 8, 1, 1}), // Shift
+    ::testing::Values(std::vector<int64_t>{-1, -2, -3, 1, 0, 3, 3, 2, -2, -3}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseNegativeShifts6D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{4, 16, 3, 6, 5, 2}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{-2, -15, -2, -1, -4, -1}), // Shift
+    ::testing::Values(std::vector<int64_t>{0, 1, 2, 3, 4, 5}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseUnordNegAxesAndShifts10D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{2, 2, 4, 2, 3, 6, 3, 2, 3, 2}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{-2, -1, 1, 1, 1, -2}), // Shift
+    ::testing::Values(std::vector<int64_t>{-6, -4, -3, 1, -10, -2}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_2d_zero_shifts, RollLayerTest, testCase2DZeroShifts, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_1d, RollLayerTest, testCase1D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_2d, RollLayerTest, testCase2D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_3d, RollLayerTest, testCase3D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_4d, RollLayerTest, testCaseNegativeUnorderedAxes4D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_5d, RollLayerTest, testCaseRepeatingAxes5D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_shifts_6d, RollLayerTest, testCaseNegativeShifts6D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_unord_neg_shifts_and_axes_10d, RollLayerTest, testCaseUnordNegAxesAndShifts10D, RollLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/roll.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/roll.hpp
new file mode 100644
index 00000000000..cefc43f7a75
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/roll.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/single_layer/roll.hpp"
+
+namespace LayerTestsDefinitions {
+
+TEST_P(RollLayerTest, CompareWithRefs) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roll.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roll.hpp
new file mode 100644
index 00000000000..97dfcdb7fbc
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roll.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+typedef std::tuple<
+        InferenceEngine::SizeVector, // Input shapes
+        InferenceEngine::Precision,  // Input precision
+        std::vector<int64_t>,        // Shift
+        std::vector<int64_t>,        // Axes
+        std::string> rollParams;   // Device name
+
+class RollLayerTest : public testing::WithParamInterface<rollParams>, virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<rollParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/roll.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/roll.cpp
new file mode 100644
index 00000000000..e54abc943d9
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/roll.cpp
@@ -0,0 +1,46 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/roll.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string RollLayerTest::getTestCaseName(testing::TestParamInfo<rollParams> obj) {
+    InferenceEngine::SizeVector inputShapes;
+    InferenceEngine::Precision inputPrecision;
+    std::vector<int64_t> shift;
+    std::vector<int64_t> axes;
+    std::string targetDevice;
+    std::tie(inputShapes, inputPrecision, shift, axes, targetDevice) = obj.param;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "Precision=" << inputPrecision.name() << "_";
+    result << "Shift=" << CommonTestUtils::vec2str(shift) << "_";
+    result << "Axes=" << CommonTestUtils::vec2str(axes) << "_";
+    result << "TargetDevice=" << targetDevice;
+    return result.str();
+}
+
+void RollLayerTest::SetUp() {
+    InferenceEngine::SizeVector inputShapes;
+    InferenceEngine::Precision inputPrecision;
+    std::vector<int64_t> shift;
+    std::vector<int64_t> axes;
+    std::tie(inputShapes, inputPrecision, shift, axes, targetDevice) = this->GetParam();
+    auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision);
+    ngraph::ParameterVector paramVector;
+    auto paramData = std::make_shared<ngraph::opset1::Parameter>(inType, ngraph::Shape(inputShapes));
+    paramVector.push_back(paramData);
+
+    auto shiftNode = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{shift.size()}, shift)->output(0);
+    auto axesNode = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{axes.size()}, axes)->output(0);
+
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(paramVector));
+    auto roll = std::dynamic_pointer_cast<ngraph::op::v7::Roll>(ngraph::builder::makeRoll(paramOuts[0], shiftNode, axesNode));
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(roll)};
+    function = std::make_shared<ngraph::Function>(results, paramVector, "roll");
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
index 292776c307b..18e71981376 100644
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
@@ -505,5 +505,9 @@ std::shared_ptr<ngraph::Node> makeOneHot(const ngraph::Output<Node>& indices,
                                          const float& off_val,
                                          const int64_t& axis);
 
+std::shared_ptr<ngraph::Node> makeRoll(const ngraph::Output<Node>& dataNode,
+                                       const ngraph::Output<Node>& shiftNode,
+                                       const ngraph::Output<Node>& axesNode);
+
 }  // namespace builder
 }  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/roll.cpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/roll.cpp
new file mode 100644
index 00000000000..9ebe0b6ecdf
--- /dev/null
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/roll.cpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<ngraph::Node> makeRoll(const ngraph::Output<Node> &in,
+                                       const ngraph::Output<Node> &shift,
+                                       const ngraph::Output<Node> &axes) {
+    return std::make_shared<ngraph::op::v7::Roll>(in, shift, axes);
+}
+
+}  // namespace builder
+}  // namespace ngraph
diff --git a/ngraph/python/tests/test_ngraph/test_roll.py b/ngraph/python/tests/test_ngraph/test_roll.py
index 07426df0816..877e22d098e 100644
--- a/ngraph/python/tests/test_ngraph/test_roll.py
+++ b/ngraph/python/tests/test_ngraph/test_roll.py
@@ -1,10 +1,8 @@
 import ngraph as ng
 import numpy as np
-from tests import xfail_issue_49391
 from tests.runtime import get_runtime
 
 
-@xfail_issue_49391
 def test_roll():
     runtime = get_runtime()
     input = np.reshape(np.arange(10), (2, 5))

From b47d11e31e5b9acbefda6a1c590c34badf64f422 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Mon, 3 May 2021 17:12:00 +0300
Subject: [PATCH 31/73] Removed useless iostream include (#5357)

* Removed useless iostream include

* Fixed samples compilation

* Klockwork fixes for template plugin
---
 .../transformations/preprocessing/mean_image_or_value.cpp | 4 ++--
 .../src/transformations/preprocessing/std_scale.cpp       | 4 ++--
 inference-engine/ie_bridges/c/src/ie_c_api.cpp            | 1 -
 inference-engine/include/cpp/ie_executable_network.hpp    | 1 +
 inference-engine/include/ie_core.hpp                      | 1 +
 inference-engine/include/ie_iexecutable_network.hpp       | 1 +
 inference-engine/include/ie_locked_memory.hpp             | 1 -
 inference-engine/include/inference_engine.hpp             | 3 +++
 inference-engine/samples/benchmark_app/utils.cpp          | 1 -
 .../samples/common/utils/include/samples/common.hpp       | 1 +
 inference-engine/src/cldnn_engine/cldnn_engine.cpp        | 1 -
 inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp  | 1 +
 inference-engine/src/gna_plugin/backend/dnn.hpp           | 1 -
 inference-engine/src/gna_plugin/frontend/quantization.cpp | 1 -
 inference-engine/src/gna_plugin/gna_plugin.cpp            | 1 -
 inference-engine/src/inference_engine/ie_system_conf.cpp  | 4 ++--
 .../inference_engine/os/lin/lin_shared_object_loader.cpp  | 1 -
 .../src/inference_engine/system_allocator.hpp             | 2 --
 inference-engine/src/legacy_api/src/ie_layers.cpp         | 8 ++++----
 .../src/low_precision_transformations/src/transformer.cpp | 1 -
 .../src/mkldnn_plugin/nodes/proposal_onnx.cpp             | 7 -------
 inference-engine/src/plugin_api/debug.h                   | 2 +-
 .../src/readers/onnx_reader/onnx_model_validator.cpp      | 2 +-
 .../src/snippets/src/pass/insert_movebroadcast.cpp        | 1 -
 .../src/pass/load_movebroadcast_to_broadcastload.cpp      | 2 --
 .../src/vpu/common/include/vpu/utils/enums.hpp            | 2 +-
 inference-engine/src/vpu/common/src/utils/io.cpp          | 2 +-
 .../include/vpu/middleend/hw/utility.hpp                  | 2 +-
 .../src/vpu/myriad_plugin/myriad_executor.cpp             | 1 -
 .../gpu/shared_tests_instances/skip_tests_config.cpp      | 1 +
 .../fluid_preproc/fluid_test_computations/CMakeLists.txt  | 2 ++
 31 files changed, 26 insertions(+), 37 deletions(-)

diff --git a/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.cpp b/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.cpp
index 7f20813e1ba..a74d8501981 100644
--- a/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.cpp
+++ b/docs/template_plugin/src/transformations/preprocessing/mean_image_or_value.cpp
@@ -14,7 +14,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::AddMeanSubtract, "AddMeanSubtract", 0);
 
 ngraph::pass::AddMeanSubtract::AddMeanSubtract(const MeanMap & inputInfoMap) {
     // RUN_ON_FUNCTION_SCOPE(AddMeanSubtract);
-    auto param = ngraph::pattern::wrap_type<ngraph::opset3::Parameter>();
+    auto label = ngraph::pattern::wrap_type<ngraph::opset3::Parameter>();
 
     ngraph::matcher_pass_callback callback = [=] (pattern::Matcher& m) {
         auto param = std::dynamic_pointer_cast<ngraph::opset3::Parameter>(m.get_match_root());
@@ -42,7 +42,7 @@ ngraph::pass::AddMeanSubtract::AddMeanSubtract(const MeanMap & inputInfoMap) {
     };
 
     // Register pattern with Parameter operation as a pattern root node
-    auto m = std::make_shared<ngraph::pattern::Matcher>(param, "AddMeanSubtract");
+    auto m = std::make_shared<ngraph::pattern::Matcher>(label, "AddMeanSubtract");
     // Register Matcher
     register_matcher(m, callback);
 }
diff --git a/docs/template_plugin/src/transformations/preprocessing/std_scale.cpp b/docs/template_plugin/src/transformations/preprocessing/std_scale.cpp
index a27e017451b..44ad4d6080c 100644
--- a/docs/template_plugin/src/transformations/preprocessing/std_scale.cpp
+++ b/docs/template_plugin/src/transformations/preprocessing/std_scale.cpp
@@ -14,7 +14,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::AddStdScale, "AddStdScale", 0);
 
 ngraph::pass::AddStdScale::AddStdScale(const ScaleMap& inputInfoMap) {
     // RUN_ON_FUNCTION_SCOPE(AddStdScale);
-    auto param = ngraph::pattern::wrap_type<ngraph::opset3::Parameter>();
+    auto label = ngraph::pattern::wrap_type<ngraph::opset3::Parameter>();
 
     ngraph::matcher_pass_callback callback = [=] (pattern::Matcher& m) {
         auto param = std::dynamic_pointer_cast<ngraph::opset3::Parameter>(m.get_match_root());
@@ -42,7 +42,7 @@ ngraph::pass::AddStdScale::AddStdScale(const ScaleMap& inputInfoMap) {
     };
 
     // Register pattern with Parameter operation as a pattern root node
-    auto m = std::make_shared<ngraph::pattern::Matcher>(param, "AddStdScale");
+    auto m = std::make_shared<ngraph::pattern::Matcher>(label, "AddStdScale");
     // Register Matcher
     register_matcher(m, callback);
 }
diff --git a/inference-engine/ie_bridges/c/src/ie_c_api.cpp b/inference-engine/ie_bridges/c/src/ie_c_api.cpp
index d9fe8a2b372..2d7093b5a9c 100644
--- a/inference-engine/ie_bridges/c/src/ie_c_api.cpp
+++ b/inference-engine/ie_bridges/c/src/ie_c_api.cpp
@@ -8,7 +8,6 @@
 #include <map>
 #include <vector>
 #include <set>
-#include <iostream>
 #include <algorithm>
 #include <sstream>
 #include <chrono>
diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/include/cpp/ie_executable_network.hpp
index 19136fee575..6f686699754 100644
--- a/inference-engine/include/cpp/ie_executable_network.hpp
+++ b/inference-engine/include/cpp/ie_executable_network.hpp
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#include <ostream>
 #include <map>
 #include <memory>
 #include <string>
diff --git a/inference-engine/include/ie_core.hpp b/inference-engine/include/ie_core.hpp
index 2829d3234f2..ffa1f58b7a5 100644
--- a/inference-engine/include/ie_core.hpp
+++ b/inference-engine/include/ie_core.hpp
@@ -9,6 +9,7 @@
  */
 #pragma once
 
+#include <istream>
 #include <map>
 #include <memory>
 #include <string>
diff --git a/inference-engine/include/ie_iexecutable_network.hpp b/inference-engine/include/ie_iexecutable_network.hpp
index 526ce8fda81..cb5d660ee08 100644
--- a/inference-engine/include/ie_iexecutable_network.hpp
+++ b/inference-engine/include/ie_iexecutable_network.hpp
@@ -9,6 +9,7 @@
  */
 #pragma once
 
+#include <ostream>
 #include <map>
 #include <memory>
 #include <string>
diff --git a/inference-engine/include/ie_locked_memory.hpp b/inference-engine/include/ie_locked_memory.hpp
index 2bc272091fa..5242b171198 100644
--- a/inference-engine/include/ie_locked_memory.hpp
+++ b/inference-engine/include/ie_locked_memory.hpp
@@ -9,7 +9,6 @@
  */
 #pragma once
 
-#include <iostream>
 #include <utility>
 
 #include "ie_allocator.hpp"
diff --git a/inference-engine/include/inference_engine.hpp b/inference-engine/include/inference_engine.hpp
index 8c6c4768a53..45111176fbb 100644
--- a/inference-engine/include/inference_engine.hpp
+++ b/inference-engine/include/inference_engine.hpp
@@ -12,3 +12,6 @@
 #include "ie_plugin_config.hpp"
 #include "ie_compound_blob.h"
 #include "ie_core.hpp"
+
+// remove in 2022.1 major release
+#include <iostream>
diff --git a/inference-engine/samples/benchmark_app/utils.cpp b/inference-engine/samples/benchmark_app/utils.cpp
index e41bfbd1be8..2b99c3b555c 100644
--- a/inference-engine/samples/benchmark_app/utils.cpp
+++ b/inference-engine/samples/benchmark_app/utils.cpp
@@ -4,7 +4,6 @@
 
 // clang-format off
 #include <algorithm>
-#include <iostream>
 #include <map>
 #include <regex>
 #include <samples/common.hpp>
diff --git a/inference-engine/samples/common/utils/include/samples/common.hpp b/inference-engine/samples/common/utils/include/samples/common.hpp
index 48de4961ad9..b77b38c1ed2 100644
--- a/inference-engine/samples/common/utils/include/samples/common.hpp
+++ b/inference-engine/samples/common/utils/include/samples/common.hpp
@@ -14,6 +14,7 @@
 #include <functional>
 #include <inference_engine.hpp>
 #include <iomanip>
+#include <iostream>
 #include <limits>
 #include <list>
 #include <map>
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index 5ce4654dca0..b963d96a16f 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -7,7 +7,6 @@
 #include <string>
 #include <map>
 #include <vector>
-#include <iostream>
 #include <cmath>
 #include <tuple>
 #include <cctype>
diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
index 6257d8da47d..76cb57425cd 100644
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -5,6 +5,7 @@
 #include <cstdint>
 #include <vector>
 #include <cmath>
+#include <iostream>
 #include <set>
 #include <string>
 #include <algorithm>
diff --git a/inference-engine/src/gna_plugin/backend/dnn.hpp b/inference-engine/src/gna_plugin/backend/dnn.hpp
index 7b8feda361c..4cb0b74a54b 100644
--- a/inference-engine/src/gna_plugin/backend/dnn.hpp
+++ b/inference-engine/src/gna_plugin/backend/dnn.hpp
@@ -8,7 +8,6 @@
 #include <cstdio>
 #include <memory.h>
 #include <xmmintrin.h>
-#include <iostream>
 #include <fstream>
 #include <sstream>
 #include <string>
diff --git a/inference-engine/src/gna_plugin/frontend/quantization.cpp b/inference-engine/src/gna_plugin/frontend/quantization.cpp
index 69dcc1ccb58..f9a4b9018c8 100644
--- a/inference-engine/src/gna_plugin/frontend/quantization.cpp
+++ b/inference-engine/src/gna_plugin/frontend/quantization.cpp
@@ -3,7 +3,6 @@
 //
 
 #include <cstring>
-#include <iostream>
 #include <gna_plugin_log.hpp>
 #include <limits>
 #include "backend/gna_types.h"
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index a1f7e003dc7..e997e49cf91 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -5,7 +5,6 @@
 #define NOMINMAX
 
 #include <cstdlib>
-#include <iostream>
 #include <vector>
 #include <cstring>
 #include <list>
diff --git a/inference-engine/src/inference_engine/ie_system_conf.cpp b/inference-engine/src/inference_engine/ie_system_conf.cpp
index c4fbe597aa3..d9219597a93 100644
--- a/inference-engine/src/inference_engine/ie_system_conf.cpp
+++ b/inference-engine/src/inference_engine/ie_system_conf.cpp
@@ -4,10 +4,10 @@
 
 #include <cstdlib>
 #include <cstring>
+#include <vector>
+
 #include "threading/ie_parallel_custom_arena.hpp"
 #include "ie_system_conf.h"
-#include <iostream>
-#include <vector>
 
 # define XBYAK_NO_OP_NAMES
 # define XBYAK_UNDEF_JNL
diff --git a/inference-engine/src/inference_engine/os/lin/lin_shared_object_loader.cpp b/inference-engine/src/inference_engine/os/lin/lin_shared_object_loader.cpp
index 8143e0634e1..297929aa4ae 100644
--- a/inference-engine/src/inference_engine/os/lin/lin_shared_object_loader.cpp
+++ b/inference-engine/src/inference_engine/os/lin/lin_shared_object_loader.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-
 #include <dlfcn.h>
 #include <iostream>
 
diff --git a/inference-engine/src/inference_engine/system_allocator.hpp b/inference-engine/src/inference_engine/system_allocator.hpp
index 66c56511a67..ce609fff6ec 100644
--- a/inference-engine/src/inference_engine/system_allocator.hpp
+++ b/inference-engine/src/inference_engine/system_allocator.hpp
@@ -4,8 +4,6 @@
 
 #pragma once
 
-#include <iostream>
-
 #include "ie_allocator.hpp"
 
 namespace InferenceEngine {
diff --git a/inference-engine/src/legacy_api/src/ie_layers.cpp b/inference-engine/src/legacy_api/src/ie_layers.cpp
index 1649aaea12b..3f640711f1f 100644
--- a/inference-engine/src/legacy_api/src/ie_layers.cpp
+++ b/inference-engine/src/legacy_api/src/ie_layers.cpp
@@ -180,7 +180,7 @@ unsigned int CNNLayer::GetParamAsUInt(const char* param, unsigned int def) const
     std::string message = "Cannot parse parameter " + std::string(param) + " from IR for layer " + name +
                           ". Value " + val + " cannot be casted to unsigned int.";
     try {
-        long value = std::stol(val);
+        long long value = std::stoll(val);
         if ((value < 0) || (value > std::numeric_limits<unsigned int>::max())) {
             IE_THROW() << message;
         }
@@ -195,7 +195,7 @@ unsigned int CNNLayer::GetParamAsUInt(const char* param) const {
     std::string message = "Cannot parse parameter " + std::string(param) + " from IR for layer " + name +
                           ". Value " + val + " cannot be casted to unsigned int.";
     try {
-        long value = std::stol(val);
+        long long value = std::stoll(val);
         if ((value < 0) || (value > std::numeric_limits<unsigned int>::max())) {
             IE_THROW() << message;
         }
@@ -215,7 +215,7 @@ std::vector<unsigned int> CNNLayer::GetParamAsUInts(const char* param, std::vect
     if (vals.empty()) return def;
     while (getline(stream, str, ',')) {
         try {
-            long value = std::stol(str);
+            long long value = std::stoll(str);
             if ((value < 0) || (value > std::numeric_limits<unsigned int>::max())) {
                 IE_THROW() << message;
             }
@@ -236,7 +236,7 @@ std::vector<unsigned int> CNNLayer::GetParamAsUInts(const char* param) const {
                           name + ". Value " + vals + " cannot be casted to unsigned int.";
     while (getline(stream, str, ',')) {
         try {
-            long value = std::stol(str);
+            long long value = std::stoll(str);
             if ((value < 0) || (value > std::numeric_limits<unsigned int>::max())) {
                 IE_THROW() << message;
             }
diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp
index eee96c562e7..d8b484bcbce 100644
--- a/inference-engine/src/low_precision_transformations/src/transformer.cpp
+++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp
@@ -10,7 +10,6 @@
 #include <limits>
 #include <map>
 #include <memory>
-#include <iostream>
 #include <string>
 #include <typeinfo>
 #include <unordered_set>
diff --git a/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
index d10f495b723..878797e6d17 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
@@ -304,13 +304,6 @@ public:
         }
     }
 
-    void print_shape(const Blob::Ptr& b) {
-        for (size_t i = 0; i < b->getTensorDesc().getDims().size(); ++i) {
-            std::cout << b->getTensorDesc().getDims()[i] << ", ";
-        }
-        std::cout << std::endl;
-    }
-
     StatusCode execute(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs,
                        ResponseDesc *resp) noexcept override {
         try {
diff --git a/inference-engine/src/plugin_api/debug.h b/inference-engine/src/plugin_api/debug.h
index 1848e5f3905..838c5b02941 100644
--- a/inference-engine/src/plugin_api/debug.h
+++ b/inference-engine/src/plugin_api/debug.h
@@ -15,7 +15,7 @@
 #include <cstdint>
 #include <cstdlib>
 #include <functional>
-#include <iostream>
+#include <ostream>
 #include <iterator>
 #include <numeric>
 #include <sstream>
diff --git a/inference-engine/src/readers/onnx_reader/onnx_model_validator.cpp b/inference-engine/src/readers/onnx_reader/onnx_model_validator.cpp
index 26e1b020e94..ae472990c6a 100644
--- a/inference-engine/src/readers/onnx_reader/onnx_model_validator.cpp
+++ b/inference-engine/src/readers/onnx_reader/onnx_model_validator.cpp
@@ -9,7 +9,7 @@
 #include <exception>
 #include <map>
 #include <vector>
-#include <iostream>
+
 namespace detail {
 namespace onnx {
     enum Field {
diff --git a/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp b/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp
index 2186335dc4d..9da8f240d69 100644
--- a/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp
+++ b/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp
@@ -11,7 +11,6 @@
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/rt_info.hpp>
 
-#include <iostream>
 #include <numeric>
 
 using namespace ngraph;
diff --git a/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp b/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
index 36fccd8bb7d..8fa9a604ddc 100644
--- a/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
+++ b/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp
@@ -12,8 +12,6 @@
 #include <ngraph/rt_info.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 
-#include <iostream>
-
 ngraph::snippets::pass::LoadMoveBroadcastToBroadcastLoad::LoadMoveBroadcastToBroadcastLoad() {
     MATCHER_SCOPE(LoadMoveBroadcastToBroadcastLoad);
     auto param_pattern = ngraph::pattern::wrap_type<ngraph::opset1::Parameter>();
diff --git a/inference-engine/src/vpu/common/include/vpu/utils/enums.hpp b/inference-engine/src/vpu/common/include/vpu/utils/enums.hpp
index 6eddf988c7f..6e089dcd21d 100644
--- a/inference-engine/src/vpu/common/include/vpu/utils/enums.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/utils/enums.hpp
@@ -9,7 +9,7 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <string>
-#include <iostream>
+#include <ostream>
 #include <sstream>
 
 #include <vpu/utils/checked_cast.hpp>
diff --git a/inference-engine/src/vpu/common/src/utils/io.cpp b/inference-engine/src/vpu/common/src/utils/io.cpp
index dcaa8043d1a..e7324fb1bf3 100644
--- a/inference-engine/src/vpu/common/src/utils/io.cpp
+++ b/inference-engine/src/vpu/common/src/utils/io.cpp
@@ -4,7 +4,7 @@
 
 #include <vpu/utils/io.hpp>
 
-#include <iostream>
+#include <ostream>
 
 namespace vpu {
 
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/utility.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/utility.hpp
index ec7cd057f81..db89d14bdaa 100644
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/utility.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/utility.hpp
@@ -6,7 +6,7 @@
 
 #include <string>
 #include <vector>
-#include <iostream>
+#include <ostream>
 
 #include <vpu/model/data.hpp>
 #include <vpu/backend/blob_format.hpp>
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp
index ab56c310dc0..d92a948ce57 100644
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <iostream>
 #include <fstream>
 #include <vector>
 #include <mutex>
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
index 631c6a88c4d..a9e6a6aeac0 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -59,6 +59,7 @@ std::vector<std::string> disabledTestPatterns() {
             R"(.*ConstantResultSubgraphTest.*inPrc=I16.*)",
             // TODO: Issue: 54436
             R"(.*LSTMSequence.*CompareWithRefs.*mode=PURE_SEQ_RAND_SEQ_LEN_PARAM.*direction=bidirectional_clip=0.7_netPRC=FP32.*)",
+            R"(.*LSTMSequence.*CompareWithRefs.*mode=CONVERT_TO_TI_RAND_SEQ_LEN_PARAM_seq.*direction=bidirectional_clip=0.7_netPRC=FP32.*)",
             // TODO: Issue: 54194
             R"(.*ActivationLayerTest.*SoftPlus.*)",
     };
diff --git a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt
index 3ccecefe8fb..69e9c5c1402 100644
--- a/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/fluid_preproc/fluid_test_computations/CMakeLists.txt
@@ -18,3 +18,5 @@ target_include_directories(fluid_test_computations PUBLIC "${CMAKE_CURRENT_SOURC
 target_link_libraries(fluid_test_computations PRIVATE inference_engine_preproc_s inference_engine fluid)
 
 target_compile_definitions(fluid_test_computations PRIVATE IMPLEMENT_FLUID_COMPUTATION_API)
+
+add_dependencies(fluid_test_computations inference_engine_preproc)

From 29a8be523df112b49b2f793cbed552408867c4c0 Mon Sep 17 00:00:00 2001
From: Paul Youngsoo Ahn <paul.y.ahn@intel.com>
Date: Tue, 4 May 2021 15:57:06 +0900
Subject: [PATCH 32/73] [IE CLDNN] Extended eltwise fusing (#5181)

* [cldnn] Add initial fused conv eltw POC

- Add cldnn unit test
- Add fused dependency list to the fused_primitive_desc
- fuse_nodes update for saving fusing history and depenecies
- Modify Jitter to create jit constants using fused dependencies
- Add cldnn unit-test cases for multiple serial and parallel eltwise fuse pattern
- Modify Jitter and add default values in sum input

Signed-off-by: Ahn, Paul Y <paul.y.ahn@intel.com>

Co-authored-by: Andrew Kwangwoong Park <andrew.kwangwoong.park@intel.com>

* [cldnn] Update fused_conv_eltwise cldnn unit test

- Add execute and compare function
- Add cldnn unit-test case for multiple parallel eltwise and additional eltwise
- Add cldnn unit-test case for combination of multiple parallel eltw
- Add cldnn unit-test cases for serial and diverged quantize and eltwise

Signed-off-by: Andrew Kwangwoong Park <andrew.kwangwoong.park@intel.com>

* [cldnn] Modify checking fusibility of eltwise fusing

- Add new checking fusibility rule in prepare_primitive_fusing
- Move cldnn eltwise fusing test to fusing_gpu_test.cpp
- Modify method to get input var name in  jitter

Signed-off-by: Ahn, Paul Y <paul.y.ahn@intel.com>

* [cldnn] Fix fusing item type and activation fusibility checking condition
- Extract input_data_supports_fusings from fuse_activaion_f
- Fix checking supported mode bug

Co-authored-by: Andrew Kwangwoong Park <andrew.kwangwoong.park@intel.com>
---
 .../kernel_selector/core/common/jitter.cpp    |  66 ++++--
 .../kernel_selector/core/common/jitter.h      |   4 +-
 .../kernel_selector/core/kernel_base.cpp      |   9 +-
 .../core/kernel_selector_params.h             |   1 +
 .../prepare_primitive_fusing.cpp              | 171 +++++++++++++-
 .../src/include/kernel_selector_helper.h      |   8 +
 .../clDNN/src/include/program_impl.h          |   2 +-
 .../clDNN/src/include/program_node.h          |   1 +
 .../thirdparty/clDNN/src/program.cpp          |  13 +-
 .../tests/test_cases/fusings_gpu_test.cpp     | 208 ++++++++++++++++++
 10 files changed, 443 insertions(+), 40 deletions(-)

diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
index f3a234f1f2e..2db52487a85 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
@@ -1476,7 +1476,7 @@ JitConstants FusedOpsCodeGenerator::MakeLoadJitConstants(const FusedOpsConfigura
 
 JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfiguration& conf,
                                                        const std::string in_var, const Datatype in_type,
-                                                       std::string& out_var, Datatype& out_type) const {
+                                                       std::string& out_var) const {
     JitConstants jit = {};
 
     std::string op_decls = "";
@@ -1484,9 +1484,11 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
     auto idx = conf.bfzyx_idx_order;
     std::string shuffle_var = conf.shuffle_var_name;
     bool is_shuffled = false;
+    auto& fused_op_ids = desc.fused_op_ids;
+    std::vector<std::string> input_vars;
 
-    out_var = GetOutputVarName(in_var);
-    out_type = desc.output_tensor.GetDType();
+    out_var = GetOutputVarName(in_var, desc.op_id);
+    const auto& out_type = desc.output_tensor.GetDType();
 
     if (conf.load_type == FusedOpsConfiguration::LoadType::FEATURE_SHUFFLE &&
         (desc.GetType() == KernelType::SCALE || desc.GetType() == KernelType::QUANTIZE)) {
@@ -1503,15 +1505,19 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
     }
 
     auto get_acc_t = [&]() -> Datatype {
-        std::vector<Datatype> tensor_types = {desc.output_tensor.GetDType()};
+        std::vector<Datatype> input_types = {desc.output_tensor.GetDType()};
         for (auto& in : desc.tensors) {
-            tensor_types.push_back(in.GetDType());
+            input_types.push_back(in.GetDType());
+        }
+
+        for (auto& in : fused_op_ids) {
+            input_types.push_back(in.second);
         }
 
         std::vector<Datatype> types_prioritized = { Datatype::F32, Datatype::F16 };
 
         for (auto& type : types_prioritized) {
-            if (std::any_of(tensor_types.begin(), tensor_types.end(), [=](const Datatype& t) -> bool { return t == type; })) {
+            if (std::any_of(input_types.begin(), input_types.end(), [=](const Datatype& t) -> bool { return t == type; })) {
                 return type;
             }
         }
@@ -1520,25 +1526,42 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
     };
 
     auto get_input = [&](size_t index) -> std::string {
-        auto in_name = index == 0 ? in_var : GetInputVarName(index - 1, is_shuffled, shuffle_var);
-        auto tensor_type = index == 0 ? in_type : desc.tensors[index - 1].GetDType();
+        auto input_name = in_var;
+        auto input_type = in_type;
+        if (index > 0) {
+            size_t input_idx = index - 1;
+            size_t tensors_len = desc.tensors.size();
+            input_name = (input_idx < tensors_len)? GetInputVarName(input_idx, is_shuffled, shuffle_var)
+                                                    : GetOutputVarName(in_var, fused_op_ids[input_idx - tensors_len].first);
+            input_type = (input_idx < tensors_len)? desc.tensors[input_idx].GetDType() : fused_op_ids[input_idx - tensors_len].second;
+        }
         auto acc_t = get_acc_t();
 
-        if (tensor_type != acc_t)
-            return ConvertToType(in_name, acc_t, vec_size);
+        if (input_type != acc_t)
+            return ConvertToType(input_name, acc_t, vec_size);
         else
-            return in_name;
+            return input_name;
     };
 
+    // Generate input variable list: dst + tensor inputs + fused ops input
+    // If the input_vars_length are larger than max_num_input_vars, do not add dst to input variable list.
+    // because dst is not used, when Fused op has both tensor and fused input.
+    size_t input_vars_length = 1 + desc.tensors.size() + fused_op_ids.size();   // dst + tensor inputs + fused ops input
+    size_t max_num_input_vars = (desc.tensors.size() > 1)? 3 : 2;
+    size_t start_idx = (input_vars_length > max_num_input_vars) ? 1 : 0;
+    for (size_t i = start_idx; i < input_vars_length; i++) {
+        input_vars.push_back(get_input(i));
+    }
+
     switch (desc.GetType()) {
         case KernelType::SCALE: {
             auto tmp_var = out_var + "_tmp";
             if (desc.tensors.size() > 1) {
                 op_decls += "\\\n\t" + GetType(get_acc_t(), vec_size) + " " + tmp_var + " = "
-                          + get_input(0) + " * " + get_input(1) + " + " + get_input(2) + ";";
+                          + input_vars[0] + " * " + input_vars[1] + " + " + input_vars[2] + ";";
             } else {
                 op_decls += "\\\n\t" + GetType(get_acc_t(), vec_size) + " " + tmp_var + " = "
-                          + get_input(0) + " * " + get_input(1) + ";";
+                          + input_vars[0] + " * " + input_vars[1] + ";";
             }
             op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(tmp_var, vec_size) + ";";
             break;
@@ -1561,7 +1584,7 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
             }
 
             auto tmp_var = out_var + "_tmp";
-            op_decls += "\\\n\t" + GetType(get_acc_t(), vec_size) + " " + tmp_var + " = " + get_input(0) + op + get_input(1) + ";";
+            op_decls += "\\\n\t" + GetType(get_acc_t(), vec_size) + " " + tmp_var + " = " + input_vars[0] + op + input_vars[1] + ";";
             op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(tmp_var, vec_size) + ";";
             break;
         }
@@ -1570,13 +1593,14 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
             if (!p)
                 throw std::runtime_error("[clDNN] Quantize fuse params can't be nullptr");
 
-            std::string in_converted = in_var;
+            std::string in_converted = (fused_op_ids.empty()) ? in_var : GetOutputVarName(in_var, fused_op_ids[0].first);
+            Datatype input_type = (fused_op_ids.empty()) ? in_type : fused_op_ids[0].second;
             Datatype tmp_type = Datatype::F32;
             std::string tmp_type_str = GetType(tmp_type, vec_size);
             std::string tmp_var = out_var + "_tmp";
 
-            if (in_type != tmp_type) {
-                in_converted = ConvertToType(in_var, tmp_type, vec_size);
+            if (input_type != tmp_type) {
+                in_converted = ConvertToType(in_converted, tmp_type, vec_size);
             }
 
             auto post_scale = p->per_tensor_output_scale ? Broadcast(std::to_string(p->out_scale), tmp_type, vec_size)
@@ -1618,7 +1642,9 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
         case KernelType::ACTIVATION: {
             auto p = desc.GetOpParams<activation_fuse_params>();
             base_activation_params activation_p = p->param;
-            op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(in_var, vec_size) + ";";
+
+            std::string new_in_var = (fused_op_ids.empty()) ? in_var : GetOutputVarName(in_var, fused_op_ids[0].first);
+            op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(new_in_var, vec_size) + ";";
             if (activation_p.function != ActivationFunction::NONE) {
                 auto suffix = "_FUSED_OP"+std::to_string(desc.op_id) + conf.suffix;
                 std::string nl_m = std::to_string(activation_p.m);
@@ -1784,12 +1810,12 @@ std::string FusedOpsCodeGenerator::GetInputVarName(size_t input_id, bool is_shuf
     return GetTypeStr() + std::to_string(desc.op_id) + "_data" + std::to_string(input_id);
 }
 
-std::string FusedOpsCodeGenerator::GetOutputVarName(std::string input_var) const {
+std::string FusedOpsCodeGenerator::GetOutputVarName(std::string input_var, size_t op_id) const {
     std::replace(input_var.begin(), input_var.end(), '[', '_');
     std::replace(input_var.begin(), input_var.end(), ']', '_');
     std::replace(input_var.begin(), input_var.end(), ' ', '_');
     std::replace(input_var.begin(), input_var.end(), '.', '_');
-    return input_var + "_out";
+    return input_var + "_out_" + std::to_string(op_id);
 }
 
 std::string FusedOpsCodeGenerator::GetType(Datatype dt, size_t vec_size) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h
index 3ff35250826..f477c338e1c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h
@@ -340,7 +340,7 @@ public:
     JitConstants MakeLoadJitConstants(const FusedOpsConfiguration& conf, const DataTensor prim_output) const;
     JitConstants MakeOpJitConstants(const FusedOpsConfiguration& conf,
                                     const std::string in_var, const Datatype in_type,
-                                    std::string& out_var, Datatype& out_type) const;
+                                    std::string& out_var) const;
 
     bool CanPreloadData(const FusedOpsConfiguration& conf) const;
 
@@ -353,7 +353,7 @@ public:
     std::string GetIdx(size_t input_id, idx_desc idx, bool should_be_safe) const;
     std::string GetInputPtrName(size_t input_id) const;
     std::string GetInputVarName(size_t input_id, bool is_shuffled = false, std::string shuffle_var = "") const;
-    std::string GetOutputVarName(std::string input_var_name) const;
+    std::string GetOutputVarName(std::string input_var_name, size_t op_id) const;
     std::string ConvertToOutputType(std::string var, size_t vec_size = 1) const;
     std::string ConvertToType(std::string var, Datatype dt, size_t vec_size = 1) const;
     std::string CastToType(std::string var, Datatype dt, size_t vec_size = 1) const;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
index 01df9ab53e8..2feda668c1f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
@@ -115,17 +115,14 @@ JitConstants KernelBase::MakeFusedOpsJitConstants(const kernel_selector::base_pa
             std::string fused_ops_preload;
             std::string fused_ops_calc;
             std::string in_name = c.input_var_name;
+            std::string out_name = "";
             Datatype in_type = c.input_dt;
             bool can_all_use_preload = true;
 
             for (size_t i = 0; i < params.fused_ops.size(); i++) {
                 auto fused_dep_codegen = FusedOpsCodeGenerator(params.fused_ops[i]);
-                std::string out_var;
-                Datatype out_type;
                 jit.Merge(fused_dep_codegen.MakeLoadJitConstants(c, params.output));
-                jit.Merge(fused_dep_codegen.MakeOpJitConstants(c, in_name, in_type, out_var, out_type));
-                in_name = out_var;
-                in_type = out_type;
+                jit.Merge(fused_dep_codegen.MakeOpJitConstants(c, in_name, in_type, out_name));
 
                 bool can_use_preload = fused_dep_codegen.CanPreloadData(c);
                 can_all_use_preload &= can_use_preload;
@@ -145,7 +142,7 @@ JitConstants KernelBase::MakeFusedOpsJitConstants(const kernel_selector::base_pa
             jit.AddConstant(MakeJitConstant("FUSED_OPS" + c.suffix, fused_ops));
             jit.AddConstant(MakeJitConstant("FUSED_OPS_PRELOAD" + c.suffix, fused_ops_preload));
             jit.AddConstant(MakeJitConstant("FUSED_OPS_CALC" + c.suffix, fused_ops_calc));
-            jit.AddConstant(MakeJitConstant("FUSED_OPS_RESULT" + c.suffix, in_name));
+            jit.AddConstant(MakeJitConstant("FUSED_OPS_RESULT" + c.suffix, out_name));
 
             bool can_any_use_preload = !fused_ops_preload.empty();
             jit.AddConstant(MakeJitConstant("FUSED_OPS_CAN_USE_PRELOAD" + c.suffix,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
index 04b5ce64eb7..ce5d9062c46 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
@@ -558,6 +558,7 @@ struct fused_operation_desc {
     MultiDataTensor tensors;
     DataTensor output_tensor;
     size_t op_id;
+    std::vector<std::pair<size_t, Datatype>> fused_op_ids;
 
     // Helper functions for operation generation
     KernelType GetType() const { return op_params->GetType(); }
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
index 0a192f75c14..21011545282 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
@@ -45,10 +45,12 @@
 #include "extract_image_patches_inst.h"
 #include "reduce_inst.h"
 #include <vector>
+#include <map>
 #include <list>
 #include <memory>
 #include <string>
 #include <utility>
+#include <deque>
 #include "error_handler.h"
 
 void prepare_primitive_fusing::run(program_impl& p) {
@@ -164,12 +166,13 @@ void prepare_primitive_fusing::fuse_reorders(program_impl &p) {
 
 void prepare_primitive_fusing::fuse_activations(program_impl &p) {
     bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
+    std::map<primitive_id, std::vector<primitive_id>> fusing_history;
     auto itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
         auto node_itr = itr++;
         auto& node = (*node_itr);
 
-        program_helpers::do_for_types<activation>(*node, [&p, &is_debug](activation_node& node) {
+        program_helpers::do_for_types<activation>(*node, [&p, &is_debug, &fusing_history](activation_node& node) {
             auto& input = node.input();
             auto id = node.id();
             // Restrictions:
@@ -226,7 +229,7 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
             } else {
                 // If node already has any fused node using new mechanism,
                 // we can just use the same way and handle any amount of activations
-                p.fuse_nodes(input, node);
+                p.fuse_nodes(input, node, &fusing_history);
             }
 
             p.add_optimized_primitive_info(id, {input.id()});
@@ -350,6 +353,7 @@ void prepare_primitive_fusing::fuse_bias(program_impl &p) {
 
 void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
     bool recalc_processing_order = false;
+    std::map<primitive_id, std::vector<primitive_id>> fusing_history;
 
     auto itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
@@ -497,9 +501,63 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
             return true;
         };
 
+        auto get_users_from_fusing_history = [&](primitive_id id) {
+            std::vector<primitive_id> users;
+            for (auto deps_data : fusing_history) {
+                auto key = deps_data.first;
+                auto deps_vec = deps_data.second;
+                auto iter = std::find(deps_vec.begin(), deps_vec.end(), id);
+                if (iter != deps_vec.end()) {
+                    users.push_back(key);
+                }
+            }
+            return users;
+        };
+
+        auto input_data_supports_fusings = [&](cldnn::program_node& input_data, primitive_id current_node_id) -> bool {
+            if (input_data.get_users().size() != 1) {
+                // If input_data has fused primitives,
+                // find original dependency of current_node using fusing_history
+                // and check the number of users of it.
+                // If the node has multiple users it's not fusible.
+                if (input_data.has_fused_primitives()) {
+                    size_t num_original_dependencies = 0;
+                    auto iter = fusing_history.find(current_node_id);
+                    if (iter != fusing_history.end()) {
+                        // Find current_node's original dependency list
+                        for (auto& prim_id : iter->second) {
+                            // find input_data's fused_prims in the prim_deps_ids
+                            auto& fused_descs = input_data.get_fused_primitives();
+                            auto origin_input_iter = std::find_if(fused_descs.begin(), fused_descs.end(),
+                                                                    [&](cldnn::fused_primitive_desc& desc) {
+                                return (desc.node->id() == prim_id);
+                            });
+                            if (origin_input_iter != fused_descs.end()) {
+                                auto users = get_users_from_fusing_history(origin_input_iter->node->id());
+                                if (users.size() != 1) {
+                                    return false;
+                                }
+                                num_original_dependencies++;
+                            }
+                        }
+                    }
+                    // If num_original_dependencies is zero, input_data is original parent
+                    if (num_original_dependencies == 0) {
+                        return false;
+                    }
+                } else {
+                    return false;
+                }
+            }
+            return true;
+        };
+
         auto fuse_activation_f = [&](activation_node& activation_node) {
             auto& input_data = activation_node.get_dependency(0);
-            if (input_data.get_users().size() != 1 || activation_node.get_dependencies().size() >= 3)
+            if (activation_node.get_dependencies().size() >= 3)
+                return;
+
+            if (!input_data_supports_fusings(input_data, activation_node.id()))
                 return;
 
             bool should_fuse = input_data.is_type<binary_convolution>();
@@ -558,7 +616,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
             if (!should_fuse)
                 return;
 
-            p.fuse_nodes(input_data, activation_node);
+            p.fuse_nodes(input_data, activation_node, &fusing_history);
         };
 
         auto fuse_scale_f = [&](scale_node& scale_node) {
@@ -623,7 +681,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
             if (!should_fuse)
                 return;
 
-            p.fuse_nodes(input_data, scale_node);
+            p.fuse_nodes(input_data, scale_node, &fusing_history);
         };
 
         auto fuse_quantize_f = [&](quantize_node& quantize_node) {
@@ -717,7 +775,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
             if (!should_fuse)
                 return;
 
-            p.fuse_nodes(input_data, quantize_node);
+            p.fuse_nodes(input_data, quantize_node, &fusing_history);
         };
 
         auto fuse_eltwise_f = [&](eltwise_node& node) {
@@ -811,8 +869,83 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
             if (parent2->is_type<convolution>() && !conv_supports_fusings(parent2->as<convolution>()))
                 return;
 
-            // This fusing can be extended to support peer node in any layout
-            bool merge_allowed = fused_node->get_users().size() == 1;
+            bool merge_allowed = true;
+            // If fused node is not convolution and fused node has multiple users,
+            //  follow the legacy checking rule
+            if (fused_node->is_type<convolution>() && fused_node->get_users().size() > 1) {
+                // Allowed new pattern: Elt1, Act, Elt2, Elt3, Elt4 are fused to Conv1
+                // * Conv1 -> Eltw1(Add) -> Act(Clamp) -> Eltw2(Mul) -> Eltw3(Mul) -> Eltw4(Add) -> Conv2
+                // *   \–----------------------------------->/                          \---------> Eltw5(Div)
+                //
+                // Extended eltwise fusiblity checking rules
+                //
+                // 1. All fusing nodes should be eltwise or activation node
+                // 2. All intermediate fusing nodes except last fusing node(i.e. Elt4) should have only eltwise or activation node as user.
+                // 3. Currently eltwise and activations are allowed to be fused from multiple branches,
+                //      but technically other fusable operations can be allowed too in the future.
+                // 4. When node_queue has only one node, the while loop is ended and this node is fused to fused node(Conv1)
+                //      node_queue having one node means all user nodes from fused node(Conv1) converge at that node.
+                // 5. if node_queue has multiple nodes even if the level of current_node is max_levels, it cannot be fused.
+                std::deque<std::pair<cldnn::program_node*, size_t>> node_queue; //std::pair<cldnn::program_node*, layer level>
+                std::vector<cldnn::program_node*> node_history;
+                node_queue.push_back(std::make_pair(fused_node, 0));
+
+                const uint8_t max_levels = 5;
+                do {
+                    // Pop the current node from node_queue
+                    // Add the current node to the node_history to verfiy the trace of checking
+                    auto current_node = node_queue.front();
+                    node_queue.pop_front();
+                    if (std::find(node_history.begin(), node_history.end(), current_node.first) == node_history.end()) {
+                        node_history.push_back(current_node.first);
+                    }
+
+                    if (current_node.second > max_levels) {
+                        return;
+                    }
+
+                    // Push node to node_queue
+                    // If the node is already existed in node_queue, do not add it to the node_queue.
+                    auto push_node_queue = [&](cldnn::program_node* in_node, size_t level) {
+                        auto iter = std::find_if(node_queue.begin(), node_queue.end(), [&](std::pair<cldnn::program_node*, size_t> element) {
+                            return (in_node->id() == element.first->id());
+                        });
+                        if (iter == node_queue.end()) {
+                            node_queue.push_back(std::make_pair(in_node, level));
+                        }
+                    };
+
+                    // If the any user node is not eltwise(mul / add mode) and activation,
+                    // the current node will be considered as last node and put it back into the node_queue
+                    auto curr_users = current_node.first->get_users();
+                    auto invalid_user_iter = std::find_if(curr_users.begin(), curr_users.end(), [&](cldnn::program_node* user) {
+                        return (user->is_output() ||
+                                    (!(user->is_type<eltwise>() && user->get_primitive()->input.size() == 2 &&
+                                        (std::find(supported_modes.begin(), supported_modes.end(),
+                                        (user->as<eltwise>()).get_primitive()->mode) != supported_modes.end())) &&
+                                    !(user->is_type<activation>() && user->get_primitive()->input.size() == 1)));
+                    });
+
+                    if (invalid_user_iter != curr_users.end()) {
+                        // If fused_node(i.e. Conv1) have invalid user node(that is not activation and eltwise ndoe), it cannot be fused
+                        if (fused_node->id() == current_node.first->id()) {
+                            return;
+                        }
+                        push_node_queue(current_node.first, (current_node.second+1));
+                        continue;
+                    }
+
+                    // Add user node in current node to the queue
+                    // But, do not add the node that passed once, it is checked using node_history
+                    for (auto& user : curr_users) {
+                        auto iter = std::find(node_history.begin(), node_history.end(), user);
+                        if (iter == node_history.end())
+                            push_node_queue(user, current_node.second+1);
+                    }
+                } while (node_queue.size() > 1);
+            } else {
+                merge_allowed = fused_node->get_users().size() == 1;
+            }
 
             for (auto& parent : fused_node->get_dependencies())
                 if (parent->id() == peer_node->id())
@@ -831,7 +964,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
                 recalc_processing_order = true;
             }
 
-            p.fuse_nodes(*fused_node, node);
+            p.fuse_nodes(*fused_node, node, &fusing_history);
         };
 
         program_helpers::do_for_types<activation, scale, quantize, eltwise>(*node,
@@ -861,6 +994,22 @@ void prepare_primitive_fusing::optimize_fused_ops(program_impl& p) {
         // 2. fuse conv bias to quantize shift
         auto& fused_prims = node->get_fused_primitives();
 
+        auto remove_deps_of_node = [&](cldnn::fused_primitive_desc& desc) {
+            for (auto& prim : fused_prims) {
+                if (desc.node->id() == prim.node->id()) {
+                    continue;
+                }
+
+                auto rm_iter = std::find_if(prim.fused_deps.begin(), prim.fused_deps.end(), [&](primitive_id& dep_id){
+                    return (desc.node->id() == dep_id);
+                });
+                if (rm_iter != prim.fused_deps.end()) {
+                    prim.fused_deps.erase(rm_iter);
+                    prim.fused_deps.insert(prim.fused_deps.end(), desc.fused_deps.begin(), desc.fused_deps.end());
+                }
+            }
+        };
+
         // Drop relu if the next fused op is quantize with u8 output and no in_shift
         auto fp_itr = fused_prims.begin();
         while (fp_itr != fused_prims.end()) {
@@ -883,6 +1032,7 @@ void prepare_primitive_fusing::optimize_fused_ops(program_impl& p) {
                                 !quantize_node.get_need_pre_shift();
 
                 if (can_skip) {
+                    remove_deps_of_node(fp);
                     fp_itr = fused_prims.erase(curr_itr);
                 }
             }
@@ -891,6 +1041,7 @@ void prepare_primitive_fusing::optimize_fused_ops(program_impl& p) {
 }
 
 void prepare_conv_eltw_fusing::fuse_conv_depth_to_space(program_impl& p, program_node* node) {
+    std::map<primitive_id, std::vector<primitive_id>> fusing_history;
     // make sure this convolution have only 1 user and it's depth_to_space
     // make sure convolution is not an output
     if (node->get_users().size() != 1 || node->is_output())
@@ -919,7 +1070,7 @@ void prepare_conv_eltw_fusing::fuse_conv_depth_to_space(program_impl& p, program
             return;
     }
 
-    p.fuse_nodes(*conv_node, *d_t_s_node);
+    p.fuse_nodes(*conv_node, *d_t_s_node, &fusing_history);
 }
 
 void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node* node) {
diff --git a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
index 5e0c510c136..46926eecbca 100644
--- a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
+++ b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
@@ -159,6 +159,7 @@ inline params_t get_default_params(const arg_t& arg, uint32_t split = 1) {
     params.layerID = arg.id();
 
     convert_fused_activation_func_params(arg, params.activations);
+    std::map<primitive_id, std::pair<size_t, kernel_selector::Datatype>> prim_op_id_map;
     size_t op_id = 0;
     for (auto& fused_prim : arg.get_fused_primitives()) {
         kernel_selector::fused_operation_desc desc;
@@ -171,6 +172,13 @@ inline params_t get_default_params(const arg_t& arg, uint32_t split = 1) {
         desc.dep_size = fused_prim.deps.size();
         desc.op_id = op_id++;
         desc.output_tensor = convert_data_tensor(fused_prim.output_layout);
+        prim_op_id_map[fused_prim.node->id()] = std::make_pair(desc.op_id, desc.output_tensor.GetDType());
+        for (auto& dep : fused_prim.fused_deps) {
+            auto iter = prim_op_id_map.find(dep);
+            if (iter != prim_op_id_map.end()) {
+                desc.fused_op_ids.push_back(iter->second);
+            }
+        }
 
         for (size_t i = desc.dep_idx_start; i < desc.dep_idx_start + desc.dep_size; i++) {
             desc.tensors.push_back(convert_data_tensor(arg.get_dependency(i).get_output_layout()));
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_impl.h b/inference-engine/thirdparty/clDNN/src/include/program_impl.h
index 2aa361b7b12..cafc2a8b44c 100644
--- a/inference-engine/thirdparty/clDNN/src/include/program_impl.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_impl.h
@@ -186,7 +186,7 @@ public:
     bool extract_and_remove(program_node& node);
 
     // Fuses two nodes into fused_node and removes peer_node from graph
-    void fuse_nodes(program_node& fused_node, program_node& peer_node);
+    void fuse_nodes(program_node& fused_node, program_node& peer_node, std::map<primitive_id, std::vector<primitive_id>>* fusing_history);
 
     // returns if 'node' has been removed
     bool remove_if_dangling(program_node& node);
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_node.h b/inference-engine/thirdparty/clDNN/src/include/program_node.h
index 0cd3225f98b..3e8711d8625 100644
--- a/inference-engine/thirdparty/clDNN/src/include/program_node.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_node.h
@@ -40,6 +40,7 @@ struct fused_primitive_desc {
     std::shared_ptr<program_node> node;
     size_t dep_start_idx;
     std::vector<primitive_id> deps;
+    std::vector<primitive_id> fused_deps;
     activation_func activation;
     activation_additional_params activation_params;
     layout output_layout = layout(data_types::f32, format::bfyx, tensor());
diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp
index e8900de4555..76b109bf75c 100644
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@@ -895,7 +895,7 @@ bool program_impl::extract_and_remove(program_node& node) {
     return true;
 }
 
-void program_impl::fuse_nodes(program_node &fused_node, program_node &peer_node) {
+void program_impl::fuse_nodes(program_node &fused_node, program_node &peer_node, std::map<primitive_id, std::vector<primitive_id>>* fusing_history) {
     auto peer_layout = peer_node.get_output_layout();
     fused_primitive_desc local_desc;
     local_desc.node = get_node_ptr(peer_node.id());
@@ -913,6 +913,13 @@ void program_impl::fuse_nodes(program_node &fused_node, program_node &peer_node)
     cldnn::padding needed_padding = padding::max(peer_layout.data_padding,
                                                  fused_node.get_output_layout().data_padding);
 
+    auto history_iter = fusing_history->find(peer_node.id());
+    if (history_iter != fusing_history->end()) {
+        for (auto& id : history_iter->second) {
+            local_desc.fused_deps.push_back(id);
+        }
+    }
+
     // Add new dependencies to the fused_node
     for (size_t i = 0; i < peer_node.get_dependencies().size(); i++) {
         auto& dep = peer_node.get_dependency(i);
@@ -952,6 +959,10 @@ void program_impl::fuse_nodes(program_node &fused_node, program_node &peer_node)
     }
     add_optimized_primitive_info(peer_node.id(), { fused_node.id() });
 
+    for (auto& user : peer_node.users) {
+        (*fusing_history)[user->id()].push_back(peer_node.id());
+    }
+
     // Remove all edges connected with peer node
     while (peer_node.get_dependencies().size() > 0) {
         auto& dep = peer_node.get_dependency(peer_node.get_dependencies().size() - 1);
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
index 33975234b03..a14d6d7975a 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
@@ -323,6 +323,8 @@ public:
         return layout{ p.data_type, p.default_format, tensor{1, p.in_shape.feature[0], 1, 1} };
     }
 
+
+
     layout get_single_element_layout(T& p) {
         return layout{ p.default_type, p.default_format, tensor{1, 1, 1, 1} };
     }
@@ -831,6 +833,212 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_prelu_eltwise,
                                              bc_test_params{CASE_CONV_FP16_4, 2, 4},
                                              }), );
 
+
+class conv_fp32_multi_eltwise_2 : public ConvFusingTest {};
+TEST_P(conv_fp32_multi_eltwise_2, basic) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+        data("eltwise_data", get_mem(get_output_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("weights", get_mem(get_weights_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+        eltwise("eltwise1", "conv_prim", "eltwise_data", eltwise_mode::sum),
+        eltwise("eltwise2", "eltwise1", "conv_prim", eltwise_mode::prod),
+        reorder("reorder_bfyx", "eltwise2", p.default_format, data_types::f32)
+    );
+    implementation_desc conv_impl = { format::b_fs_yx_fsv16, ""};
+    bo_fused.set_option(build_option::force_implementations({ {"conv_prim", conv_impl} }));
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_multi_eltwise_2,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                                             bc_test_params{CASE_CONV_FP32_2, 2, 4},
+                                             bc_test_params{CASE_CONV_FP32_3, 2, 4},
+                                             bc_test_params{CASE_CONV_FP32_4, 2, 4},
+
+                                             bc_test_params{CASE_CONV_FP16_2, 2, 4},
+                                             bc_test_params{CASE_CONV_FP16_3, 2, 4},
+                                             bc_test_params{CASE_CONV_FP16_4, 2, 4},
+                                             }), );
+
+
+class conv_fp32_multi_eltwise_2_clamp : public ConvFusingTest {};
+
+TEST_P(conv_fp32_multi_eltwise_2_clamp, basic) {
+    auto p = GetParam();
+
+    create_topologies(input_layout("input", get_input_layout(p)),
+        data("eltwise1_data", get_mem(get_output_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("weights", get_mem(get_weights_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+        eltwise("eltwise1", "conv_prim", "eltwise1_data", eltwise_mode::sum),
+        activation("activation", "eltwise1", activation_func::clamp, {0.5f, 2.5f}),
+        eltwise("eltwise2", "activation", "conv_prim", eltwise_mode::prod),
+        reorder("reorder_bfyx", "eltwise2", p.default_format, data_types::f32)
+    );
+    implementation_desc conv_impl = { format::b_fs_yx_fsv16, ""};
+    bo_fused.set_option(build_option::force_implementations({ {"conv_prim", conv_impl} }));
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_multi_eltwise_2_clamp,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                                             bc_test_params{CASE_CONV_FP32_2, 2, 5},
+                                             bc_test_params{CASE_CONV_FP32_3, 2, 5},
+                                             bc_test_params{CASE_CONV_FP32_4, 2, 5},
+
+                                             bc_test_params{CASE_CONV_FP16_2, 2, 5},
+                                             bc_test_params{CASE_CONV_FP16_3, 2, 5},
+                                             bc_test_params{CASE_CONV_FP16_4, 2, 5},
+                                             }), );
+
+
+class conv_fp32_multi_eltwise_4_clamp : public ConvFusingTest {};
+
+TEST_P(conv_fp32_multi_eltwise_4_clamp, basic) {
+    auto p = GetParam();
+
+    create_topologies(input_layout("input", get_input_layout(p)),
+        data("eltwise1_data", get_mem(get_output_layout(p))),
+        data("eltwise2_data", get_mem(get_output_layout(p))),
+        data("eltwise4_data", get_mem(get_output_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("weights", get_mem(get_weights_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+        eltwise("eltwise1_add", "conv_prim", "eltwise1_data", eltwise_mode::sum),
+        activation("activation", "eltwise1_add", activation_func::clamp, {0.5f, 2.5f}),
+        eltwise("eltwise2_mul", "activation", "conv_prim", eltwise_mode::prod),
+        eltwise("eltwise3_div", "eltwise2_mul", "eltwise2_data", eltwise_mode::prod),
+        eltwise("eltwise4_add", "eltwise3_div", "eltwise4_data", eltwise_mode::sum),
+        reorder("reorder_bfyx", "eltwise4_add", p.default_format, data_types::f32)
+    );
+    implementation_desc conv_impl = { format::b_fs_yx_fsv16, ""};
+    bo_fused.set_option(build_option::force_implementations({ {"conv_prim", conv_impl} }));
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_multi_eltwise_4_clamp,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                                             bc_test_params{CASE_CONV_FP32_2, 2, 7},
+                                             bc_test_params{CASE_CONV_FP32_3, 2, 7},
+                                             bc_test_params{CASE_CONV_FP32_4, 2, 7},
+
+                                             bc_test_params{CASE_CONV_FP16_2, 2, 7},
+                                             bc_test_params{CASE_CONV_FP16_3, 2, 7},
+                                             bc_test_params{CASE_CONV_FP16_4, 2, 7},
+                                             }), );
+
+
+class conv_fp32_multi_eltwise_3_fusing : public ConvFusingTest {};
+TEST_P(conv_fp32_multi_eltwise_3_fusing, basic) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+        data("eltwise_data1", get_mem(get_output_layout(p))),
+        data("eltwise_data2", get_mem(get_output_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("weights", get_mem(get_weights_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+        eltwise("eltwise1", "conv_prim", "eltwise_data1", eltwise_mode::sum),
+        eltwise("eltwise2", "conv_prim", "eltwise_data2", eltwise_mode::sum),
+        eltwise("eltwise3", "eltwise1", "eltwise2", eltwise_mode::prod),
+        reorder("reorder_bfyx", "eltwise3", p.default_format, data_types::f32)
+    );
+    implementation_desc conv_impl = { format::b_fs_yx_fsv16, ""};
+    bo_fused.set_option(build_option::force_implementations({ {"conv_prim", conv_impl} }));
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_multi_eltwise_3_fusing,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                                             bc_test_params{CASE_CONV_FP32_2, 2, 5},
+                                             bc_test_params{CASE_CONV_FP32_3, 2, 5},
+                                             bc_test_params{CASE_CONV_FP32_4, 2, 5},
+
+                                             bc_test_params{CASE_CONV_FP16_2, 2, 5},
+                                             bc_test_params{CASE_CONV_FP16_3, 2, 5},
+                                             bc_test_params{CASE_CONV_FP16_4, 2, 5},
+                                             }), );
+
+
+
+class conv_fp32_multi_eltwise_quantization : public ConvFusingTest {};
+TEST_P(conv_fp32_multi_eltwise_quantization, basic) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+                        data("weights", get_mem(get_weights_layout(p))),
+                        data("bias", get_mem(get_bias_layout(p))),
+                        data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+                        data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+                        data("out_lo", get_mem(get_single_element_layout(p), -127)),
+                        data("out_hi", get_mem(get_single_element_layout(p), 127)),
+                        data("eltwise_data1", get_mem(get_output_layout(p))),
+                        convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+                        quantize("quantize", "conv_prim", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
+                        eltwise("eltwise1", "conv_prim", "eltwise_data1", eltwise_mode::sum),
+                        eltwise("eltwise2", "eltwise1", "quantize", eltwise_mode::prod),
+                        reorder("reorder_bfyx", "eltwise2", p.default_format, data_types::f32)
+    );
+    tolerance = 1.f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_multi_eltwise_quantization,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                                             bc_test_params{CASE_CONV_FP32_2, 4, 5},
+                                             bc_test_params{CASE_CONV_FP32_4, 4, 5},
+
+                                             bc_test_params{CASE_CONV_FP16_2, 4, 5},
+                                             bc_test_params{CASE_CONV_FP16_3, 4, 5},
+                                             bc_test_params{CASE_CONV_FP16_4, 4, 5},
+                                             }), );
+
+
+class conv_fp32_multi_eltwise_concat : public ConvFusingTest {};
+TEST_P(conv_fp32_multi_eltwise_concat, basic) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+        data("eltwise_data1", get_mem(get_output_layout(p))),
+        data("eltwise_data2", get_mem(get_output_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        data("weights", get_mem(get_weights_layout(p))),
+        convolution("conv_prim", "input", { "weights" }, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+        eltwise("eltwise1", "conv_prim", "eltwise_data1", eltwise_mode::sum),
+        eltwise("eltwise2", "conv_prim", "eltwise_data2", eltwise_mode::sum),
+        concatenation("concat",
+            {"eltwise1", "eltwise2"},
+            concatenation::concatenation_axis::along_f,
+            data_types::i8,
+            padding{{0, 0, 0, 0}, 0}),
+        reorder("reorder_bfyx", "concat", p.default_format, data_types::f32)
+    );
+    implementation_desc conv_impl = { format::b_fs_yx_fsv16, ""};
+    bo_fused.set_option(build_option::force_implementations({ {"conv_prim", conv_impl} }));
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_multi_eltwise_concat,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                                             bc_test_params{CASE_CONV_FP32_2, 5, 5},
+                                             bc_test_params{CASE_CONV_FP32_3, 5, 5},
+                                             bc_test_params{CASE_CONV_FP32_4, 5, 5},
+
+                                             bc_test_params{CASE_CONV_FP16_2, 5, 5},
+                                             bc_test_params{CASE_CONV_FP16_3, 5, 5},
+                                             bc_test_params{CASE_CONV_FP16_4, 5, 5},
+                                             }), );
+
 class conv_fp32_eltwise_b_fs_zyx_fsv16 : public ConvFusingTest {};
 
 TEST_P(conv_fp32_eltwise_b_fs_zyx_fsv16, vector_ops) {

From e8a2ca6fb6ed73f2ed865ad741e80bc74acac74f Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Tue, 4 May 2021 12:27:48 +0300
Subject: [PATCH 33/73] Added precompiled headers to inference_engine_obj
 (#5490)

---
 .../src/inference_engine/CMakeLists.txt       |  6 +++-
 .../subgraphs_dumper/CMakeLists.txt           |  4 +--
 .../subgraphs_dumper/src/precomp.hpp          | 34 ------------------
 .../subgraphs_dumper/tests/CMakeLists.txt     |  4 +--
 .../conformance/test_runner/CMakeLists.txt    |  4 +--
 .../conformance/test_runner/src/precomp.hpp   | 36 -------------------
 ngraph/test/runtime/CMakeLists.txt            |  2 --
 7 files changed, 8 insertions(+), 82 deletions(-)
 delete mode 100644 inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/precomp.hpp
 delete mode 100644 inference-engine/tests/functional/plugin/conformance/test_runner/src/precomp.hpp

diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index 03f41078046..5fcce933c30 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -66,6 +66,10 @@ if(ENABLE_SSE42)
     ie_sse42_optimization_flags(sse4_2_flags)
     set_source_files_properties(${SSE_SRC} PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
     add_definitions(-DHAVE_SSE=1)
+
+    if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.16")
+        set_source_files_properties(${SSE_SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
+    endif()
 endif()
 
 addVersionDefines(ie_version.cpp CI_BUILD_NUMBER)
@@ -109,7 +113,7 @@ add_library(${TARGET_NAME}_obj OBJECT
             ${PUBLIC_HEADERS})
 
 ie_faster_build(${TARGET_NAME}_obj
-    UNITY
+    UNITY PCH PRIVATE "precomp.hpp"
 )
 
 target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API
diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt
index b1eb5bead0e..f10df452e20 100644
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt
+++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt
@@ -25,6 +25,4 @@ addIeTargetTest(
         ADD_CPPLINT
 )
 
-ie_faster_build(${TARGET_NAME}
-        PCH PRIVATE "src/precomp.hpp"
-        )
+ie_faster_build(${TARGET_NAME} UNITY)
diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/precomp.hpp b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/precomp.hpp
deleted file mode 100644
index d6e9a611080..00000000000
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/precomp.hpp
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <gtest/gtest.h>
-
-#include <ngraph/ngraph.hpp>
-#include <ngraph/ops.hpp>
-
-#include <algorithm>
-#include <functional>
-#include <initializer_list>
-#include <iterator>
-#include <map>
-#include <memory>
-#include <numeric>
-#include <ostream>
-#include <set>
-#include <sstream>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <typeinfo>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <cassert>
-#include <cctype>
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
diff --git a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/tests/CMakeLists.txt b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/tests/CMakeLists.txt
index 802f596e17d..bea78882172 100644
--- a/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/tests/CMakeLists.txt
+++ b/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/tests/CMakeLists.txt
@@ -24,6 +24,4 @@ addIeTargetTest(
         ADD_CPPLINT
 )
 
-ie_faster_build(${TARGET_NAME}
-        PCH PRIVATE "${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/tests/functional/plugin/conformance/subgraphs_dumper/src/precomp.hpp"
-        )
+ie_faster_build(${TARGET_NAME} UNITY)
diff --git a/inference-engine/tests/functional/plugin/conformance/test_runner/CMakeLists.txt b/inference-engine/tests/functional/plugin/conformance/test_runner/CMakeLists.txt
index de7e630c85f..b12307aed42 100644
--- a/inference-engine/tests/functional/plugin/conformance/test_runner/CMakeLists.txt
+++ b/inference-engine/tests/functional/plugin/conformance/test_runner/CMakeLists.txt
@@ -27,6 +27,4 @@ addIeTargetTest(
             CONFORMANCE
 )
 
-ie_faster_build(${TARGET_NAME}
-        PCH PRIVATE "src/precomp.hpp"
-        )
+ie_faster_build(${TARGET_NAME} UNITY)
diff --git a/inference-engine/tests/functional/plugin/conformance/test_runner/src/precomp.hpp b/inference-engine/tests/functional/plugin/conformance/test_runner/src/precomp.hpp
deleted file mode 100644
index 23ba10110cf..00000000000
--- a/inference-engine/tests/functional/plugin/conformance/test_runner/src/precomp.hpp
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <gtest/gtest.h>
-
-#include <ngraph/ngraph.hpp>
-#include <ngraph/ops.hpp>
-#include "ngraph_functions/builders.hpp"
-#include "ngraph_functions/subgraph_builders.hpp"
-
-#include <algorithm>
-#include <functional>
-#include <initializer_list>
-#include <iterator>
-#include <map>
-#include <memory>
-#include <numeric>
-#include <ostream>
-#include <set>
-#include <sstream>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <typeinfo>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <cassert>
-#include <cctype>
-#include <cmath>
-#include <cstdlib>
-#include <cstring>
diff --git a/ngraph/test/runtime/CMakeLists.txt b/ngraph/test/runtime/CMakeLists.txt
index 1cdc0842928..be84499ee0e 100644
--- a/ngraph/test/runtime/CMakeLists.txt
+++ b/ngraph/test/runtime/CMakeLists.txt
@@ -38,8 +38,6 @@ set (SRC
     pass/shape_relevance.hpp
     )
 
-disable_deprecated_warnings()
-
 add_library(ngraph_backend SHARED ${SRC})
 
 if(COMMAND ie_faster_build)

From d716db4acc6eba66ee7e71da6d87361bf4ce5ef3 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Tue, 4 May 2021 14:04:21 +0300
Subject: [PATCH 34/73] Extend Python nGraph API with Einsum-7 and test nGraph
 reader (#5486)

* Extend nGraph Python API and test IE IR reader for Einsum

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Format description for test auxiliary function

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>

* Remove print from the python test

Signed-off-by: Roman Kazantsev <roman.kazantsev@intel.com>
---
 .../ngraph_reader/einsum_tests.cpp            | 272 ++++++++++++++++++
 ngraph/python/src/ngraph/__init__.py          |   1 +
 ngraph/python/src/ngraph/opset7/__init__.py   |   1 +
 ngraph/python/src/ngraph/opset7/ops.py        |  22 +-
 ngraph/python/tests/__init__.py               |   2 +
 .../python/tests/test_ngraph/test_einsum.py   |  98 +++++++
 6 files changed, 394 insertions(+), 2 deletions(-)
 create mode 100644 inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp
 create mode 100644 ngraph/python/tests/test_ngraph/test_einsum.py

diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp
new file mode 100644
index 00000000000..753aff586cb
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/einsum_tests.cpp
@@ -0,0 +1,272 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include "ngraph_reader_tests.hpp"
+#include "common_test_utils/xml_net_builder/ir_net.hpp"
+
+TEST_F(NGraphReaderTests, ReadEinsumNetwork) {
+    std::string model = R"V0G0N(
+<net name="saved_model" version="10">
+    <layers>
+        <layer id="0" name="input_a" type="Parameter" version="opset1">
+            <data shape="2,3,4" element_type="f32"/>
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="input_b" type="Parameter" version="opset1">
+            <data shape="5,3,4" element_type="f32"/>
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>5</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="2" name="einsum" type="Einsum" version="opset7">
+            <data equation="abc,dbc-&gt;ad"/>
+            <input>
+                <port id="0">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                </port>
+                <port id="1">
+                    <dim>5</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                </port>
+            </input>
+            <output>
+                <port id="2" precision="FP32">
+                    <dim>2</dim>
+                    <dim>5</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="3" name="gelu/sink_port_0" type="Result" version="opset1">
+            <input>
+                <port id="0">
+                    <dim>2</dim>
+                    <dim>5</dim>
+                </port>
+            </input>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
+    </edges>
+</net>
+)V0G0N";
+    std::string modelV7 = R"V0G0N(
+<net name="saved_model" version="7">
+    <layers>
+        <layer id="0" name="input_a" type="Input" version="opset1">
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="input_b" type="Input" version="opset1">
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>5</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="2" name="einsum" type="Einsum" version="opset7">
+            <data equation="abc,dbc-&gt;ad"/>
+            <input>
+                <port id="0">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                </port>
+                <port id="1">
+                    <dim>5</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                </port>
+            </input>
+            <output>
+                <port id="2" precision="FP32">
+                    <dim>2</dim>
+                    <dim>5</dim>
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
+    </edges>
+</net>
+)V0G0N";
+    compareIRs(model, modelV7);
+}
+
+TEST_F(NGraphReaderTests, ReadEinsumNetwork2) {
+    std::string model = R"V0G0N(
+<net name="saved_model" version="10">
+    <layers>
+        <layer id="0" name="input_a" type="Parameter" version="opset1">
+            <data shape="2,3,4,5" element_type="f32"/>
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                    <dim>5</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="input_b" type="Parameter" version="opset1">
+            <data shape="4,5,6" element_type="f32"/>
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>4</dim>
+                    <dim>5</dim>
+                    <dim>6</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="2" name="input_c" type="Parameter" version="opset1">
+            <data shape="7,4,5" element_type="f32"/>
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>7</dim>
+                    <dim>4</dim>
+                    <dim>5</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="3" name="einsum" type="Einsum" version="opset7">
+            <data equation="abcd,cde,fcd-&gt;abe"/>
+            <input>
+                <port id="0">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                    <dim>5</dim>
+                </port>
+                <port id="1">
+                    <dim>4</dim>
+                    <dim>5</dim>
+                    <dim>6</dim>
+                </port>
+                <port id="2">
+                    <dim>7</dim>
+                    <dim>4</dim>
+                    <dim>5</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3" precision="FP32">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>6</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="4" name="gelu/sink_port_0" type="Result" version="opset1">
+            <input>
+                <port id="0">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>6</dim>
+                </port>
+            </input>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
+        <edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
+        <edge from-layer="2" from-port="0" to-layer="3" to-port="2"/>
+        <edge from-layer="3" from-port="3" to-layer="4" to-port="0"/>
+    </edges>
+</net>
+)V0G0N";
+    std::string modelV7 = R"V0G0N(
+<net name="saved_model" version="7">
+    <layers>
+        <layer id="0" name="input_a" type="Input" version="opset1">
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                    <dim>5</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="input_b" type="Input" version="opset1">
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>4</dim>
+                    <dim>5</dim>
+                    <dim>6</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="2" name="input_c" type="Input" version="opset1">
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>7</dim>
+                    <dim>4</dim>
+                    <dim>5</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="3" name="einsum" type="Einsum" version="opset7">
+            <data equation="abcd,cde,fcd-&gt;abe"/>
+            <input>
+                <port id="0">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>4</dim>
+                    <dim>5</dim>
+                </port>
+                <port id="1">
+                    <dim>4</dim>
+                    <dim>5</dim>
+                    <dim>6</dim>
+                </port>
+                <port id="2">
+                    <dim>7</dim>
+                    <dim>4</dim>
+                    <dim>5</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3" precision="FP32">
+                    <dim>2</dim>
+                    <dim>3</dim>
+                    <dim>6</dim>
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
+        <edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
+        <edge from-layer="2" from-port="0" to-layer="3" to-port="2"/>
+    </edges>
+</net>
+)V0G0N";
+    compareIRs(model, modelV7);
+}
+
diff --git a/ngraph/python/src/ngraph/__init__.py b/ngraph/python/src/ngraph/__init__.py
index 441392d4f0a..c66e3ee81e0 100644
--- a/ngraph/python/src/ngraph/__init__.py
+++ b/ngraph/python/src/ngraph/__init__.py
@@ -53,6 +53,7 @@ from ngraph.opset7 import deformable_psroi_pooling
 from ngraph.opset7 import depth_to_space
 from ngraph.opset7 import detection_output
 from ngraph.opset7 import divide
+from ngraph.opset7 import einsum
 from ngraph.opset7 import elu
 from ngraph.opset7 import embedding_bag_offsets_sum
 from ngraph.opset7 import embedding_bag_packed_sum
diff --git a/ngraph/python/src/ngraph/opset7/__init__.py b/ngraph/python/src/ngraph/opset7/__init__.py
index a7d12fb6f02..c1ded5f9ad4 100644
--- a/ngraph/python/src/ngraph/opset7/__init__.py
+++ b/ngraph/python/src/ngraph/opset7/__init__.py
@@ -38,6 +38,7 @@ from ngraph.opset1.ops import deformable_psroi_pooling
 from ngraph.opset1.ops import depth_to_space
 from ngraph.opset1.ops import detection_output
 from ngraph.opset1.ops import divide
+from ngraph.opset7.ops import einsum
 from ngraph.opset1.ops import elu
 from ngraph.opset3.ops import embedding_bag_offsets_sum
 from ngraph.opset3.ops import embedding_bag_packed_sum
diff --git a/ngraph/python/src/ngraph/opset7/ops.py b/ngraph/python/src/ngraph/opset7/ops.py
index dee2c5d3192..419ac419fa6 100644
--- a/ngraph/python/src/ngraph/opset7/ops.py
+++ b/ngraph/python/src/ngraph/opset7/ops.py
@@ -2,11 +2,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 """Factory functions for all ngraph ops."""
+from functools import partial
 from typing import Callable, Iterable, List, Optional, Set, Union
 
 import numpy as np
-from functools import partial
-
 from ngraph.impl import Node, Shape
 from ngraph.impl.op import Constant, Parameter
 from ngraph.opset_utils import _get_node_factory
@@ -42,9 +41,28 @@ from ngraph.utils.types import (
 
 _get_node_factory_opset7 = partial(_get_node_factory, "opset7")
 
+
 # -------------------------------------------- ops ------------------------------------------------
 
 
+@nameable_op
+def einsum(
+        inputs: List[Node],
+        equation: str
+) -> Node:
+    """Return a node which performs Einsum operation.
+
+    @param inputs: The list of input nodes
+    @param equation: Einsum equation
+    @return: The new node performing Einsum operation on the inputs
+    """
+    attributes = {
+        "equation": equation
+    }
+
+    return _get_node_factory_opset7().create("Einsum", as_nodes(*inputs), attributes)
+
+
 @nameable_op
 def gelu(
         data: Node,
diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py
index 1a5925e56bf..e579d02dfa0 100644
--- a/ngraph/python/tests/__init__.py
+++ b/ngraph/python/tests/__init__.py
@@ -159,3 +159,5 @@ xfail_issue_52463 = xfail_test(reason="test_operator_add_size1_singleton_broadca
                                       "Not equal to tolerance")
 
 xfail_issue_49391 = xfail_test(reason="Roll is not implemented in CPU plugin.")
+
+xfail_issue_45432 = xfail_test(reason="Einsum is not implemented in CPU plugin.")
diff --git a/ngraph/python/tests/test_ngraph/test_einsum.py b/ngraph/python/tests/test_ngraph/test_einsum.py
new file mode 100644
index 00000000000..a89b6c3ff35
--- /dev/null
+++ b/ngraph/python/tests/test_ngraph/test_einsum.py
@@ -0,0 +1,98 @@
+import ngraph as ng
+import numpy as np
+import pytest
+
+from ngraph.utils.types import get_element_type
+from tests import xfail_issue_45432
+from tests.runtime import get_runtime
+
+
+def einsum_op_exec(input_shapes: list, equation: str, data_type: np.dtype,
+                   with_value=False, seed=202104):
+    """Test Einsum operation for given input shapes, equation, and data type.
+
+    It generates input data of given shapes and type, receives reference results using numpy,
+    and tests IE implementation by matching with reference numpy results.
+    :param input_shapes: a list of tuples with shapes
+    :param equation: Einsum equation
+    :param data_type: a type of input data
+    :param with_value: if True - tests output data shape and type along with its value,
+                       otherwise, tests only the output shape and type
+    :param seed: a seed for random generation of input data
+    """
+    np.random.seed(seed)
+    num_inputs = len(input_shapes)
+    runtime = get_runtime()
+
+    # set absolute tolerance based on the data type
+    atol = 0.0 if np.issubdtype(data_type, np.integer) else 1e-04
+
+    # generate input tensors
+    ng_inputs = []
+    np_inputs = []
+    for i in range(num_inputs):
+        input_i = np.random.random_integers(10, size=input_shapes[i]).astype(data_type)
+        np_inputs.append(input_i)
+        ng_inputs.append(ng.parameter(input_i.shape, dtype=data_type))
+
+    expected_result = np.einsum(equation, *np_inputs)
+    einsum_model = ng.einsum(ng_inputs, equation)
+
+    # check the output shape and type
+    assert einsum_model.get_type_name() == "Einsum"
+    assert einsum_model.get_output_size() == 1
+    assert list(einsum_model.get_output_shape(0)) == list(expected_result.shape)
+    assert einsum_model.get_output_element_type(0) == get_element_type(data_type)
+
+    # check inference result
+    if with_value:
+        computation = runtime.computation(einsum_model, *ng_inputs)
+        actual_result = computation(*np_inputs)
+        np.allclose(actual_result, expected_result, atol=atol)
+
+
+@pytest.mark.parametrize("data_type", [np.float32, np.int32])
+def test_dot_product(data_type):
+    einsum_op_exec([5, 5], "i,i->", data_type)
+
+
+@pytest.mark.parametrize("data_type", [np.float32, np.int32])
+def test_matrix_multiplication(data_type):
+    einsum_op_exec([(2, 3), (3, 4)], "ab,bc->ac", data_type)
+
+
+@pytest.mark.parametrize("data_type", [np.float32, np.int32])
+def test_batch_trace(data_type):
+    einsum_op_exec([(2, 3, 3)], "kii->k", data_type)
+
+
+@pytest.mark.parametrize("data_type", [np.float32, np.int32])
+def test_diagonal_extraction(data_type):
+    einsum_op_exec([(6, 5, 5)], "kii->ki", data_type)
+
+
+@pytest.mark.parametrize("data_type", [np.float32, np.int32])
+def test_transpose(data_type):
+    einsum_op_exec([(1, 2, 3)], "ijk->kij", data_type)
+
+
+@pytest.mark.parametrize("data_type", [np.float32, np.int32])
+def test_multiple_multiplication(data_type):
+    einsum_op_exec([(2, 5), (5, 3, 6), (5, 3)], "ab,bcd,bc->ca", data_type)
+
+
+@pytest.mark.parametrize("data_type", [np.float32, np.int32])
+def test_simple_ellipsis(data_type):
+    einsum_op_exec([(5, 3, 4)], "a...->...", data_type)
+
+
+@xfail_issue_45432
+@pytest.mark.parametrize("data_type", [np.float32, np.int32])
+def test_multiple_ellipsis(data_type):
+    einsum_op_exec([(3, 5), 1], "a...,...->a...", data_type, with_value=True)
+
+
+@xfail_issue_45432
+@pytest.mark.parametrize("data_type", [np.float32, np.int32])
+def test_broadcasting_ellipsis(data_type):
+    einsum_op_exec([(9, 1, 4, 3), (3, 11, 7, 1)], "a...b,b...->a...", data_type, with_value=True)

From 2a580e7c26c803f37176a086ab9aaf9bb870a046 Mon Sep 17 00:00:00 2001
From: Egor Duplensky <egor.duplenskii@intel.com>
Date: Tue, 4 May 2021 14:36:28 +0300
Subject: [PATCH 35/73] Extend blobs dumping with filtering by environment
 variables (#5113)

* Extend blobs dumping with filtering by environment variables

The idea is to dump blogs without rebuilding

We cannot just use environment variables without compile-time flag
because of security flaws.

Instead, it is expected that developers just always set additional
macro (BLOB_DUMP_PATH) which is not set for the production builds:

export CXXFLAGS="-DBLOB_DUMP_PATH=\\\"mkldnn_dump\\\""

This macro activates blob dump filtering using environment variables.

To prevent unnecessary dumping, blobs are not dumped by default even
if macro is defined.
---
 inference-engine/cmake/features.cmake         |   2 +
 .../src/mkldnn_plugin/CMakeLists.txt          |   4 +
 .../src/mkldnn_plugin/mkldnn_graph.cpp        | 175 ++++------------
 .../src/mkldnn_plugin/mkldnn_graph.h          |   5 +-
 .../src/mkldnn_plugin/mkldnn_node.h           |   1 +
 .../src/mkldnn_plugin/utils/README.md         |  73 +++++++
 .../src/mkldnn_plugin/utils/blob_dump.cpp     |   8 +-
 .../src/mkldnn_plugin/utils/blob_dump.h       |   8 +-
 .../mkldnn_plugin/utils/debug_capabilities.h  |  10 +
 .../src/mkldnn_plugin/utils/node_dumper.cpp   | 195 ++++++++++++++++++
 .../src/mkldnn_plugin/utils/node_dumper.h     |  59 ++++++
 11 files changed, 396 insertions(+), 144 deletions(-)
 create mode 100644 inference-engine/src/mkldnn_plugin/utils/README.md
 create mode 100644 inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
 create mode 100644 inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/utils/node_dumper.h

diff --git a/inference-engine/cmake/features.cmake b/inference-engine/cmake/features.cmake
index 54068b3b9e7..db7a0028f60 100644
--- a/inference-engine/cmake/features.cmake
+++ b/inference-engine/cmake/features.cmake
@@ -93,6 +93,8 @@ ie_dependent_option(ENABLE_TBB_RELEASE_ONLY "Only Release TBB libraries are link
 
 ie_option (USE_SYSTEM_PUGIXML "use the system copy of pugixml" OFF)
 
+ie_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" OFF)
+
 #
 # Process featues
 #
diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
index fb06dc1e5fd..fa3ff943360 100644
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -12,6 +12,10 @@ if (WIN32)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX")
 endif()
 
+if (ENABLE_CPU_DEBUG_CAPS)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCPU_DEBUG_CAPS")
+endif()
+
 file(GLOB_RECURSE SOURCES
         ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index d1507ad1dad..b48f6a6f8ba 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -36,29 +36,18 @@
 #include "precision_utils.h"
 #include <ie_plugin_config.hpp>
 
-#include "utils/blob_dump.h"
 #include "utils/general_utils.h"
+#include "utils/debug_capabilities.h"
+#include "utils/node_dumper.h"
 
 /*****************************************************
  * Debug capability
- *  - BLOB_DUMP_PATH : Specify with existing folder name
- *    to dump intermediate blobs into it
  *  - PRINT_GRAPH_INFO : Define it to enable printing
  *    additional information to std output.
  *
+ * @todo Align with CPU_DEBUG_CAPS implementation
  *****************************************************/
-// #define BLOB_DUMP_PATH "mkldnn_dump"
 // #define PRINT_GRAPH_INFO
-// #define DUMP_AS_TEXT
-// #define DUMP_INTERNAL_BLOBS
-
-#ifdef BLOB_DUMP_PATH
-#   define DUMP_DIR        BLOB_DUMP_PATH
-#   define ENABLE_DUMP(_x) { _x ;}
-#else
-#   define DUMP_DIR ""
-#   define ENABLE_DUMP(_x)
-#endif
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -365,30 +354,15 @@ void MKLDNNGraph::InitGraph() {
     if (!config.dumpToDot.empty())
         dumpToDotFile(config.dumpToDot + "_init.dot");
 
-#ifndef DUMP_INTERNAL_BLOBS
+#ifndef CPU_DEBUG_CAPS
     for (auto &graphNode : graphNodes) {
         graphNode->cleanup();
     }
 #endif
 
 #if !defined(NDEBUG) && defined(PRINT_GRAPH_INFO)
-    for (auto &graphNode : graphNodes) {
-        std::cout << "name: " << graphNode->getName() << " [ ";
-        if (graphNode->parentEdges.size() > 0) {
-            auto prnt_out_desc = graphNode->parentEdges[0].lock()->getOutputDesc();
-            std::cout << "in: " << prnt_out_desc.getPrecision().name()
-                      << "/l=" << prnt_out_desc.getLayout()
-                    << "; ";
-        }
-        if (graphNode->childEdges.size() > 0) {
-            auto chld_in_desc = graphNode->childEdges[0].lock()->getInputDesc();
-            std::cout << "out: " << chld_in_desc.getPrecision().name()
-                      << "/l=" << chld_in_desc.getLayout();
-        }
-        std::cout << " ]"  << std::endl;
-    }
+    printGraphInfo();
 #endif
-
     ExecuteConstantNodesOnly();
 }
 
@@ -853,6 +827,8 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
 
     mkldnn::stream stream(eng);
 
+    ENABLE_CPU_DEBUG_CAP(NodeDumper nd(infer_count));
+
     for (int i = 0; i < graphNodes.size(); i++) {
         if (request != nullptr) {
             request->ThrowIfCanceled();
@@ -863,13 +839,14 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
         if (batch > 0)
             graphNodes[i]->setDynamicBatchLim(batch);
 
-        ENABLE_DUMP(do_before(DUMP_DIR, graphNodes[i]));
+        ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(graphNodes[i]));
 
         if (!graphNodes[i]->isConstant()) {
             OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, graphNodes[i]->profiling.execute);
             graphNodes[i]->execute(stream);
         }
-        ENABLE_DUMP(do_after(DUMP_DIR, graphNodes[i]));
+
+        ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(graphNodes[i]));
     }
 
     if (infer_count != -1) infer_count++;
@@ -1197,106 +1174,6 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa
     return newReorder;
 }
 
-void MKLDNNGraph::dumpToDotFile(std::string file) const {
-    std::ofstream dot;
-    dot.open(file);
-    if (!dot.is_open()) IE_THROW() << "CPU Plugin cannot create dot file " << file << ".";
-
-    dump_graph_as_dot(*this, dot);
-    dot.close();
-}
-
-void MKLDNNGraph::do_before(const std::string &dir, const MKLDNNNodePtr &node) {
-    auto exec_order = std::to_string(node->execIndex);
-    std::string nodeName = node->name;
-    std::replace(nodeName.begin(), nodeName.end(), '\\', '_');
-    std::replace(nodeName.begin(), nodeName.end(), '/', '_');
-    std::replace(nodeName.begin(), nodeName.end(), ' ', '_');
-    std::replace(nodeName.begin(), nodeName.end(), ':', '-');
-
-    auto num_ports = node->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size();
-    for (size_t i = 0; i < num_ports; i++) {
-        auto prEdge = node->getParentEdgeAt(i);
-        auto pr = prEdge->getParent();
-
-        std::string file_name = nodeName;
-        if (infer_count != -1) file_name += "_iter" + std::to_string(infer_count);
-        file_name += "_in" + std::to_string(i) + ".ieb";
-        if (file_name.size() > 240)
-            file_name = file_name.substr(file_name.size() - 240);
-
-
-        auto dump_file = dir + "/#" + exec_order + "_" + file_name;
-        TensorDesc desc = prEdge->getDesc();
-        if (desc.getPrecision() == Precision::BIN)
-            continue;
-
-        BlobDumper dumper(prEdge->getBlob());
-        if (pr->ext_scales) dumper.withScales(pr->ext_scales);
-#ifdef DUMP_AS_TEXT
-        dumper.dumpAsTxt(dump_file);
-#else
-        dumper.dump(dump_file);
-#endif
-    }
-
-#ifdef DUMP_INTERNAL_BLOBS
-    for (size_t i = 0; i < node->internalBlobs.size(); i++) {
-        const auto& blb = node->internalBlobs[i];
-        auto dump_file = dir + "/#" + exec_order + "_" +  nodeName + "_blb" + std::to_string(i) + ".ieb";
-        TensorDesc desc = blb->getTensorDesc();
-        if (desc.getPrecision() == Precision::BIN)
-            continue;
-        BlobDumper dumper(blb);
-#ifdef DUMP_AS_TEXT
-        dumper.dumpAsTxt(dump_file);
-#else
-        dumper.dump(dump_file);
-#endif
-    }
-#endif
-}
-
-void MKLDNNGraph::do_after(const std::string &dir, const MKLDNNNodePtr &node) {
-    auto exec_order = std::to_string(node->execIndex);
-    auto nodeName = node->name;
-    std::replace(nodeName.begin(), nodeName.end(), '\\', '_');
-    std::replace(nodeName.begin(), nodeName.end(), '/', '_');
-    std::replace(nodeName.begin(), nodeName.end(), ' ', '_');
-    std::replace(nodeName.begin(), nodeName.end(), ':', '-');
-
-    auto num_ports = node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size();
-    for (size_t i = 0; i < num_ports; i++) {
-        auto childEdge = node->getChildEdgeAt(i);
-
-        std::string file_name = nodeName;
-        if (infer_count != -1) file_name += "_iter" + std::to_string(infer_count);
-        file_name += "_out" + std::to_string(i) + ".ieb";
-        if (file_name.size() > 240)
-            file_name = file_name.substr(file_name.size() - 240);
-
-        auto dump_file = dir + "/#" + exec_order + "_" + file_name;
-        std::cout << "try : " << dump_file << std::endl;
-
-        TensorDesc desc = childEdge->getDesc();
-        if (desc.getPrecision() == Precision::BIN)
-            continue;
-
-        BlobDumper dumper(childEdge->getBlob());
-        if (node->ext_scales) dumper.withScales(node->ext_scales);
-
-#ifdef DUMP_AS_TEXT
-        dumper.dumpAsTxt(dump_file);
-#else
-        dumper.dump(dump_file);
-#endif
-    }
-}
-
-InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
-    return dump_graph_as_ie_ngraph_net(*this);
-}
-
 bool MKLDNNGraph::InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode) {
     auto oIndex = edge->getOutputNum();
     auto iIndex = edge->getInputNum();
@@ -1335,3 +1212,35 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo
     graphNodes.push_back(node);
     return true;
 }
+
+
+InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
+    return dump_graph_as_ie_ngraph_net(*this);
+}
+
+void MKLDNNGraph::dumpToDotFile(std::string file) const {
+    std::ofstream dot;
+    dot.open(file);
+    if (!dot.is_open()) IE_THROW() << "CPU Plugin cannot create dot file " << file << ".";
+
+    dump_graph_as_dot(*this, dot);
+    dot.close();
+}
+
+void MKLDNNGraph::printGraphInfo() const {
+    for (auto &graphNode : graphNodes) {
+        std::cout << "name: " << graphNode->getName() << " [ ";
+        if (graphNode->parentEdges.size() > 0) {
+            auto prnt_out_desc = graphNode->parentEdges[0].lock()->getOutputDesc();
+            std::cout << "in: " << prnt_out_desc.getPrecision().name()
+                      << "/l=" << prnt_out_desc.getLayout()
+                      << "; ";
+        }
+        if (graphNode->childEdges.size() > 0) {
+            auto chld_in_desc = graphNode->childEdges[0].lock()->getInputDesc();
+            std::cout << "out: " << chld_in_desc.getPrecision().name()
+                      << "/l=" << chld_in_desc.getLayout();
+        }
+        std::cout << " ]"  << std::endl;
+    }
+}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
index 5e8d9d7d009..ae2532dbee5 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -206,9 +206,6 @@ protected:
     void ExecuteConstantNodesOnly();
     void SetOriginalLayerNames();
 
-    void do_before(const std::string &dir, const MKLDNNNodePtr &node);
-    void do_after(const std::string &dir, const MKLDNNNodePtr &node);
-
     friend class MKLDNNInferRequest;
     friend class MKLDNNGraphlessInferRequest;
     friend InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph);
@@ -216,6 +213,8 @@ protected:
 
 private:
     void dumpToDotFile(std::string file) const;
+    void printGraphInfo() const;
+
     struct ParsedLayer {
         MKLDNNNodePtr parent;
         InferenceEngine::CNNLayerPtr cnnLayer;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index 483c315e955..1f598476081 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -592,6 +592,7 @@ protected:
     friend class MKLDNNEdge;
     friend class MKLDNNGraph;
     friend class MKLDNNGraphOptimizer;
+    friend class NodeDumper;
 
     bool isUninitTensorDesc(const InferenceEngine::TensorDesc& desc) const;
     bool isInitConfig(const InferenceEngine::LayerConfig& config) const;
diff --git a/inference-engine/src/mkldnn_plugin/utils/README.md b/inference-engine/src/mkldnn_plugin/utils/README.md
new file mode 100644
index 00000000000..af50c8d5015
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/utils/README.md
@@ -0,0 +1,73 @@
+# Debug capabilities
+Use the following cmake option to enable debug capabilities:
+
+`-DENABLE_CPU_DEBUG_CAPS=ON`
+
+## Blob dumping
+Blob dumping is controlled by environment variables (filters).
+
+The variables define conditions of the node which input, output and internal blobs
+should be dumped for.
+
+> **NOTE**: Nothing is dumped by default
+
+> **NOTE**: All specified filters should be matched in order blobs to be dumped
+
+Environment variables can be set per execution, for example:
+```sh
+    OV_CPU_BLOB_DUMP_DIR=dump_dir binary ...
+```
+or for shell session (bash example):
+```sh
+    export OV_CPU_BLOB_DUMP_DIR=dump_dir
+    binary ...
+```
+### Specify dump directory
+```sh
+    OV_CPU_BLOB_DUMP_DIR=<directory-name> binary ...
+```
+Default is *mkldnn_dump*
+### Specify dump format
+```sh
+    OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
+```
+Options are:
+* BIN (default)
+* TEXT
+
+### Filter by execution ID
+To dump blobs only for node with specified execution IDs:
+```sh
+    OV_CPU_BLOB_DUMP_NODE_EXEC_ID='<space_separated_list_of_ids>' binary ...
+```
+Example:
+```sh
+    OV_CPU_BLOB_DUMP_NODE_EXEC_ID='1 12 45' binary ...
+```
+
+### Filter by type
+To dump blobs only for node with specified type:
+```sh
+    OV_CPU_BLOB_DUMP_NODE_TYPE=<type> binary ...
+```
+Example:
+```sh
+    OV_CPU_BLOB_DUMP_NODE_TYPE=Convolution binary ...
+```
+
+> **NOTE**: see **enum Type** in [mkldnn_node.h](../mkldnn_node.h) for list of the types
+
+### Filter by name
+To dump blobs only for node with name matching specified regex:
+```sh
+    OV_CPU_BLOB_DUMP_NODE_NAME=<regex> binary ...
+```
+Example:
+```sh
+    OV_CPU_BLOB_DUMP_NODE_NAME=".+Fused_Add.+" binary ...
+```
+
+### Dump all the blobs
+```sh
+    OV_CPU_BLOB_DUMP_NODE_NAME=".+" binary ...
+```
diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
index 7c78c8be153..17b13034f7f 100644
--- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
@@ -143,7 +143,7 @@ static Blob::Ptr prepare_plain_data(Blob::Ptr blob) {
     return pln_blob;
 }
 
-void BlobDumper::dump(std::ostream &stream) {
+void BlobDumper::dump(std::ostream &stream) const {
     if (!_blob)
         IE_THROW() << "Dumper cannot dump empty Blob";
 
@@ -172,7 +172,7 @@ void BlobDumper::dump(std::ostream &stream) {
     }
 }
 
-void BlobDumper::dumpAsTxt(std::ostream &stream) {
+void BlobDumper::dumpAsTxt(std::ostream &stream) const {
     if (!_blob)
         IE_THROW() << "Dumper cannot dump empty Blob";
 
@@ -289,7 +289,7 @@ BlobDumper BlobDumper::read(const std::string &file_path) {
     return res;
 }
 
-void BlobDumper::dump(const std::string &dump_path) {
+void BlobDumper::dump(const std::string &dump_path) const {
     std::ofstream dump_file;
     dump_file.open(dump_path);
     if (!dump_file.is_open())
@@ -299,7 +299,7 @@ void BlobDumper::dump(const std::string &dump_path) {
     dump_file.close();
 }
 
-void BlobDumper::dumpAsTxt(const std::string dump_path) {
+void BlobDumper::dumpAsTxt(const std::string& dump_path) const {
     std::ofstream dump_file;
     dump_file.open(dump_path);
     if (!dump_file.is_open())
diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
index 18c755f7bbc..c2cc793e421 100644
--- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
+++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
@@ -32,11 +32,11 @@ public:
     static BlobDumper read(const std::string &file_path);
     static BlobDumper read(std::istream &stream);
 
-    void dump(const std::string &file_path);
-    void dump(std::ostream &stream);
+    void dump(const std::string &file_path) const;
+    void dump(std::ostream &stream) const;
 
-    void dumpAsTxt(const std::string file_path);
-    void dumpAsTxt(std::ostream &stream);
+    void dumpAsTxt(const std::string &file_path) const;
+    void dumpAsTxt(std::ostream &stream) const;
 
     BlobDumper& withScales(InferenceEngine::Blob::Ptr scales);
     BlobDumper& withoutScales();
diff --git a/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
new file mode 100644
index 00000000000..64af835064d
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/utils/debug_capabilities.h
@@ -0,0 +1,10 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#ifdef CPU_DEBUG_CAPS
+#   define ENABLE_CPU_DEBUG_CAP(_x) _x;
+#else
+#   define ENABLE_CPU_DEBUG_CAP(_x)
+#endif
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
new file mode 100644
index 00000000000..f69551159dc
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
@@ -0,0 +1,195 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#ifdef CPU_DEBUG_CAPS
+
+#include "node_dumper.h"
+
+#include "mkldnn_node.h"
+#include "utils/blob_dump.h"
+
+#include "ie_common.h"
+#include <array>
+#include <regex>
+#include <sstream>
+#include <string>
+
+using namespace InferenceEngine;
+
+namespace MKLDNNPlugin {
+
+NodeDumper::NodeDumper(int _count):
+    count(_count), dumpFormat(DUMP_FORMAT::BIN) {
+    const char* dumpDirEnv = std::getenv("OV_CPU_BLOB_DUMP_DIR");
+    if (dumpDirEnv)
+        dumpDirName = dumpDirEnv;
+
+    const char* dumpFormatEnv = std::getenv("OV_CPU_BLOB_DUMP_FORMAT");
+    if (dumpFormatEnv)
+        dumpFormat = parseDumpFormat(dumpFormatEnv);
+
+    const char* filter = std::getenv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID");
+    if (filter)
+        dumpFilters[FILTER::BY_EXEC_ID] = filter;
+
+    filter = std::getenv("OV_CPU_BLOB_DUMP_NODE_TYPE");
+    if (filter)
+        dumpFilters[FILTER::BY_TYPE] = filter;
+
+    filter = std::getenv("OV_CPU_BLOB_DUMP_NODE_NAME");
+    if (filter)
+        dumpFilters[FILTER::BY_NAME] = filter;
+}
+
+void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
+    if (!shouldBeDumped(node))
+        return;
+
+    auto exec_order = std::to_string(node->getExecIndex());
+    std::string nodeName = node->getName();
+    formatNodeName(nodeName);
+
+    auto num_ports = node->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size();
+    for (size_t i = 0; i < num_ports; i++) {
+        auto prEdge = node->getParentEdgeAt(i);
+        auto pr = prEdge->getParent();
+
+        std::string file_name = NameFromType(node->getType()) + "_" + nodeName;
+        if (count != -1)
+            file_name += "_iter" + std::to_string(count);
+        file_name += "_in" + std::to_string(i) + ".ieb";
+        if (file_name.size() > 240)
+            file_name = file_name.substr(file_name.size() - 240);
+
+        auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
+        std::cout << "Dump before: " << dump_file << std::endl;
+
+        TensorDesc desc = prEdge->getDesc();
+        if (desc.getPrecision() == Precision::BIN)
+            continue;
+
+        BlobDumper dumper(prEdge->getBlob());
+        if (pr->ext_scales)
+            dumper.withScales(pr->ext_scales);
+
+        dump(dumper, dump_file);
+    }
+
+    dumpInternalBlobs(node);
+}
+
+void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const {
+    if (!shouldBeDumped(node))
+        return;
+
+    auto exec_order = std::to_string(node->getExecIndex());
+    std::string nodeName = node->getName();
+    formatNodeName(nodeName);
+
+    auto num_ports = node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size();
+    for (size_t i = 0; i < num_ports; i++) {
+        auto childEdge = node->getChildEdgeAt(i);
+
+        std::string file_name = NameFromType(node->getType()) + "_" + nodeName;
+        if (count != -1)
+            file_name += "_iter" + std::to_string(count);
+        file_name += "_out" + std::to_string(i) + ".ieb";
+        if (file_name.size() > 240)
+            file_name = file_name.substr(file_name.size() - 240);
+
+        auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
+        std::cout << "Dump after:  " << dump_file << std::endl;
+
+        TensorDesc desc = childEdge->getDesc();
+        if (desc.getPrecision() == Precision::BIN)
+            continue;
+
+        BlobDumper dumper(childEdge->getBlob());
+        if (node->ext_scales)
+            dumper.withScales(node->ext_scales);
+
+        dump(dumper, dump_file);
+    }
+}
+
+void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const {
+    std::string nodeName = node->getName();
+    formatNodeName(nodeName);
+
+    for (size_t i = 0; i < node->internalBlobs.size(); i++) {
+        const auto& blb = node->internalBlobs[i];
+        std::string file_name = NameFromType(node->getType()) + "_" + nodeName + "_blb" + std::to_string(i) + ".ieb";
+        auto dump_file = dumpDirName + "/#" + std::to_string(node->getExecIndex()) + "_" + file_name;
+
+        TensorDesc desc = blb->getTensorDesc();
+        if (desc.getPrecision() == Precision::BIN)
+            continue;
+
+        BlobDumper dumper(blb);
+        dump(dumper, dump_file);
+    }
+}
+
+void NodeDumper::dump(const BlobDumper& bd, const std::string& file) const {
+    switch (dumpFormat) {
+    case DUMP_FORMAT::BIN: {
+        bd.dump(file);
+        break;
+    }
+    case DUMP_FORMAT::TEXT: {
+        bd.dumpAsTxt(file);
+        break;
+    }
+    default:
+        IE_THROW() << "Unknown dump format";
+    }
+}
+
+bool NodeDumper::shouldBeDumped(const MKLDNNNodePtr& node) const {
+    if (dumpFilters.empty())
+        return false;
+
+    if (dumpFilters.count(FILTER::BY_EXEC_ID)) { // filter by exec id env set
+        std::stringstream ss(dumpFilters.at(FILTER::BY_EXEC_ID));
+        int id;
+        bool matched = false;
+        while (ss >> id) {
+            if (node->getExecIndex() == id) // exec id matches
+                matched = true;
+        }
+
+        if (!matched)
+            return false;
+    }
+
+    if (dumpFilters.count(FILTER::BY_TYPE)) { // filter by type env set
+        if (NameFromType(node->getType()) != dumpFilters.at(FILTER::BY_TYPE)) // type does not match
+            return false;
+    }
+
+    if (dumpFilters.count(FILTER::BY_NAME)) { // filter by name env set
+        if (!std::regex_match(node->getName(), std::regex(dumpFilters.at(FILTER::BY_NAME)))) // name does not match
+            return false;
+    }
+
+    return true;
+}
+
+NodeDumper::DUMP_FORMAT NodeDumper::parseDumpFormat(const std::string& format) const {
+    if (format == "BIN")
+        return DUMP_FORMAT::BIN;
+    else if (format == "TEXT")
+        return DUMP_FORMAT::TEXT;
+    else
+        IE_THROW() << "Unknown dump format";
+}
+
+void NodeDumper::formatNodeName(std::string& name) const {
+    std::replace(name.begin(), name.end(), '\\', '_');
+    std::replace(name.begin(), name.end(), '/', '_');
+    std::replace(name.begin(), name.end(), ' ', '_');
+    std::replace(name.begin(), name.end(), ':', '-');
+}
+
+} // namespace MKLDNNPlugin
+#endif // CPU_DEBUG_CAPS
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.h b/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
new file mode 100644
index 00000000000..7dd1ac1f0c6
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.h
@@ -0,0 +1,59 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#ifdef CPU_DEBUG_CAPS
+#pragma once
+
+#include "mkldnn_node.h"
+#include "utils/blob_dump.h"
+
+#include <unordered_map>
+#include <string>
+
+namespace MKLDNNPlugin {
+
+/**
+ * Blobs are not dumped by default
+ * Blobs are dumped if node matches all specified env filters
+ *
+ * To dump blobs from all the nodes use the following filter:
+ *
+ * OV_CPU_BLOB_DUMP_NODE_NAME=.+
+ */
+class NodeDumper {
+public:
+    NodeDumper(int _count);
+
+    void dumpInputBlobs(const MKLDNNNodePtr &node) const;
+    void dumpOutputBlobs(const MKLDNNNodePtr &node) const;
+
+private:
+    void dumpInternalBlobs(const MKLDNNNodePtr& node) const;
+    void dump(const BlobDumper& bd, const std::string& file) const;
+    bool shouldBeDumped(const MKLDNNNodePtr &node) const;
+
+    enum class DUMP_FORMAT {
+        BIN,
+        TEXT,
+    };
+
+    DUMP_FORMAT parseDumpFormat(const std::string& format) const;
+    void formatNodeName(std::string& name) const;
+
+    DUMP_FORMAT dumpFormat;
+
+    int count;
+
+    std::string dumpDirName = "mkldnn_dump";
+
+    enum FILTER {
+        BY_EXEC_ID,
+        BY_TYPE,
+        BY_NAME,
+        COUNT,
+    };
+
+    std::unordered_map<FILTER, std::string> dumpFilters;
+};
+} // namespace MKLDNNPlugin
+#endif // CPU_DEBUG_CAPS

From d16c215fdec58e25d7c610dc8eac185e7caf1d5c Mon Sep 17 00:00:00 2001
From: Olesya Martinyuk <olesya.martinyuk@intel.com>
Date: Tue, 4 May 2021 14:49:10 +0300
Subject: [PATCH 36/73] Add CPU info to time tests (#5451)

---
 tests/time_tests/test_runner/conftest.py |  3 ++-
 tests/time_tests/test_runner/utils.py    | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/tests/time_tests/test_runner/conftest.py b/tests/time_tests/test_runner/conftest.py
index d83148644d0..cd3e8267e3f 100644
--- a/tests/time_tests/test_runner/conftest.py
+++ b/tests/time_tests/test_runner/conftest.py
@@ -31,7 +31,7 @@ from jsonschema import validate, ValidationError
 
 from scripts.run_timetest import check_positive_int
 from test_runner.utils import upload_timetest_data, metadata_from_manifest, get_os_name, get_os_version, \
-    DATABASE, DB_COLLECTIONS
+    get_cpu_info, DATABASE, DB_COLLECTIONS
 
 
 # -------------------- CLI options --------------------
@@ -384,6 +384,7 @@ def pytest_runtest_makereport(item, call):
     data = item._request.test_info["db_info"].copy()
     data["results"] = item._request.test_info["results"].copy()
     data["raw_results"] = item._request.test_info["raw_results"].copy()
+    data["cpu_info"] = get_cpu_info()
     data["status"] = "not_finished"
     data["error_msg"] = ""
 
diff --git a/tests/time_tests/test_runner/utils.py b/tests/time_tests/test_runner/utils.py
index 7612b375676..990af4c9b6b 100644
--- a/tests/time_tests/test_runner/utils.py
+++ b/tests/time_tests/test_runner/utils.py
@@ -5,6 +5,7 @@
 
 import os
 import platform
+import subprocess
 import sys
 import distro
 import yaml
@@ -150,3 +151,26 @@ def get_os_version():
     if os_type_is_darwin():
         return tuple(platform.mac_ver()[0].split(".")[:2])
     raise UnsupportedOsError()
+
+
+def get_cpu_info():
+    """
+    Check OS version and returns name and frequency of cpu
+
+    :return: CPU name and frequency
+    :rtype: str
+    """
+    model = ''
+    if os_type_is_linux():
+        command = r"lscpu | sed -n 's/Model name:[ \t]*//p'"
+        model = subprocess.check_output(command, shell=True)
+    elif os_type_is_windows():
+        command = 'wmic cpu get name | find /v "Name"'
+        model = subprocess.check_output(command, shell=True)
+    elif os_type_is_darwin():
+        command = ['/usr/sbin/sysctl', "-n", "machdep.cpu.brand_string"]
+        model = subprocess.check_output(command)
+    else:
+        raise UnsupportedOsError()
+    info = model.decode('utf-8').strip()
+    return info

From 895b605c06913076c7326275ced19e8af4de2891 Mon Sep 17 00:00:00 2001
From: Nico Galoppo <nico.galoppo@intel.com>
Date: Tue, 4 May 2021 05:52:33 -0700
Subject: [PATCH 37/73] [IE CLDNN] Fix OpenCL dependency for clDNN tutorials
 (#5491)

---
 inference-engine/thirdparty/clDNN/tutorial/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inference-engine/thirdparty/clDNN/tutorial/CMakeLists.txt b/inference-engine/thirdparty/clDNN/tutorial/CMakeLists.txt
index 6b481a9e5f3..0c05a9ffeb9 100644
--- a/inference-engine/thirdparty/clDNN/tutorial/CMakeLists.txt
+++ b/inference-engine/thirdparty/clDNN/tutorial/CMakeLists.txt
@@ -55,6 +55,6 @@ set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME   "${CLDNN_BUILD
 target_link_libraries("${CLDNN_BUILD__PROJ}"
     "${CLDNN_BUILD__PROJ__clDNN}"
   )
-target_link_libraries("${CLDNN_BUILD__PROJ}" ${CLDNN__SYSTEM_LINK_LIBRARIES} OpenCL)
+target_link_libraries("${CLDNN_BUILD__PROJ}" ${CLDNN__SYSTEM_LINK_LIBRARIES} clDNN_OpenCL)
 
 # ======================================================================================================

From 866515184c6bce52157c2648f284bfba2af6556c Mon Sep 17 00:00:00 2001
From: Vladimir Zinoviev <vladimir.zinoviev@intel.com>
Date: Tue, 4 May 2021 16:02:27 +0300
Subject: [PATCH 38/73] [LPT] Checks to not transform layers with incorrect
 zero points (#4764)

* [LPT] Checks to not transform layers with incorrect zero points

* [LPT] Fold not transformed weights

* [LPT] Minor fixes; review from #5313
---
 .../include/low_precision/network_helper.hpp  |  2 +
 .../weightable_layer_transformation.hpp       |  1 +
 .../src/convolution.cpp                       | 22 ++----
 .../src/group_convolution.cpp                 |  6 --
 .../src/mat_mul.cpp                           |  4 +
 .../src/network_helper.cpp                    | 57 ++++++++++++++
 .../src/weightable_layer_transformation.cpp   | 55 +++++++++++++
 .../convolution_qdq_transformation.cpp        | 78 ++++++++++++++++++-
 .../convolution_transformation.cpp            | 40 ++++++++++
 .../src/convolution_function.cpp              |  4 +-
 10 files changed, 244 insertions(+), 25 deletions(-)

diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
index 1eba940a832..dcd10d224e7 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
@@ -21,6 +21,7 @@
 #include "transformations/utils/utils.hpp"
 #include "common/fake_quantize_dequantization.hpp"
 #include "common/ie_lpt_exception.hpp"
+#include "layer_transformation.hpp"
 
 namespace ngraph {
 namespace pass {
@@ -177,6 +178,7 @@ public:
     static FakeQuantizeDequantizationValues createEmptyValues(const FakeQuantizeDequantization& dequantization);
 
     static bool isZeroConst(const std::shared_ptr<Node>& node);
+    static bool checkZeroPoint(const std::shared_ptr<Node>& node, const DataPrecision& dataPrecision = DataPrecision());
 
     static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
 
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
index 3bea283ca0f..94b81f2b2af 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
@@ -17,6 +17,7 @@ class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransforma
 public:
     WeightableLayerTransformation(const Params& params);
     bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
+    bool canConvolutionBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const;
     bool isQuantized(std::shared_ptr<Node> layer, bool reshapeIsRequired) const noexcept;
     bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 
diff --git a/inference-engine/src/low_precision_transformations/src/convolution.cpp b/inference-engine/src/low_precision_transformations/src/convolution.cpp
index 6e933667a08..020c8d3d070 100644
--- a/inference-engine/src/low_precision_transformations/src/convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp
@@ -36,28 +36,17 @@ bool ConvolutionTransformation::isQuantized(std::shared_ptr<Node> layer) const n
     return WeightableLayerTransformation::isQuantized(layer, false);
 }
 
+
+
 bool ConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
     auto convolution = m.get_match_root();
 
-    if (!WeightableLayerTransformation::canBeTransformed(context, convolution)) {
-        return false;
-    }
-
-    FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolution);
-    if (!canSubtractBeHandled(convolution, dequantization)) {
-        return false;
-    }
-
-    if ((!supportAsymmetricQuantization) && getDataPrecisionOnWeights(convolution).hasZeroPoint) {
-        return false;
-    }
-
-    if (updatePrecisions && !dequantization.empty() && !dequantization.isLowPrecision()) {
+    if (!canConvolutionBeTransformed(context, convolution)) {
         return false;
     }
 
     convolution = NetworkHelper::separateInStandaloneBranch(convolution);
-    dequantization = NetworkHelper::getDequantization(convolution);
+    FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolution);
 
     {
         std::shared_ptr<opset1::Subtract> subtract;
@@ -177,7 +166,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
 
         std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(convolution->input_value(1).get_node_shared_ptr());
 
-        const auto dequantization = reshapeFromWeights == nullptr ?
+        dequantization = reshapeFromWeights == nullptr ?
             NetworkHelper::getDequantization(convolution, 1ul) :
             NetworkHelper::getDequantization(reshapeFromWeights);
         assert(!dequantization.empty());
@@ -292,7 +281,6 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
     }
     return true;
 }
-
 } // namespace low_precision
 } // namespace pass
 } // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/src/group_convolution.cpp b/inference-engine/src/low_precision_transformations/src/group_convolution.cpp
index 3c55e62cc68..8dd7b0b1ce7 100644
--- a/inference-engine/src/low_precision_transformations/src/group_convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/group_convolution.cpp
@@ -18,12 +18,6 @@ GroupConvolutionTransformation::GroupConvolutionTransformation(const Params& par
 }
 
 void GroupConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
-    // question to nGraph: why it doesn't work
-    // addPattern(
-    //    pass,
-    //    context,
-    //    make_op_pattern<opset1::GroupConvolution>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>()}));
-
     addSingleNodePattern<opset1::GroupConvolution>(pass, context);
 }
 
diff --git a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp
index 80e752448e4..edc7d84bfcb 100644
--- a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp
+++ b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp
@@ -188,6 +188,10 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context
                 return false;
             }
         }
+
+        if (!NetworkHelper::checkZeroPoint(dequantization1.subtract)) {
+            return false;
+        }
     }
 
     const auto dequantization2 = NetworkHelper::getDequantization(layer, 1);
diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
index f825d208166..59e622b35ca 100644
--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@@ -19,6 +19,7 @@
 #include <ngraph/rt_info.hpp>
 #include "low_precision/common/ie_lpt_exception.hpp"
 #include "low_precision/common/dequantization_op.hpp"
+#include "low_precision/layer_transformation.hpp"
 
 namespace ngraph {
 namespace pass {
@@ -1540,6 +1541,62 @@ std::shared_ptr<Node> NetworkHelper::toScalarIfPossible(std::shared_ptr<Node> no
     return NetworkHelper::toScalar(constant);
 }
 
+bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const DataPrecision& dataPrecision) {
+    if (!node) {
+        return true;
+    }
+
+    float min, max;
+    if (is_type<opset1::Subtract>(node)) {
+        const auto parent = node->get_input_node_shared_ptr(0);
+        const auto intNode = is_type<opset1::Convert>(parent) ? parent : node;
+        const auto intType = intNode->get_input_element_type(0);
+        if (intType == element::u8 || intType == element::i8) {
+            min = DataPrecision::getMinValue(intType, 256) - 0.5f;
+            max = DataPrecision::getMaxValue(intType, 256) + 0.5f;
+        } else {
+            return false;
+        }
+        auto subtract1input = node->get_input_node_shared_ptr(1);
+        if (is_type<opset1::Convert>(subtract1input)) {
+            return true;
+        }
+        auto subtractConst = as_type_ptr<opset1::Constant>(subtract1input);
+        if (!subtractConst) {
+            subtractConst = as_type_ptr<opset1::Constant>(node->get_input_node_shared_ptr(1)->get_input_node_shared_ptr(0));
+            if (subtractConst == nullptr) {
+                return false;
+            }
+        }
+        const auto subtractValues = subtractConst->cast_vector<float>();
+        if (std::any_of(subtractValues.begin(), subtractValues.end(), [min, max] (const float& val) {
+                return (val < min) || (val > max); })) {
+            return false;
+        }
+    } else if (is_type<opset1::FakeQuantize>(node)) {
+        if (!dataPrecision.hasZeroPoint) {
+            return true;
+        }
+        min = dataPrecision.min - 0.5f;
+        max = dataPrecision.max + 0.5f;
+        const auto quantizationDetails = QuantizationDetails::getDetails(as_type_ptr<opset1::FakeQuantize>(node));
+        for (size_t i = 0; i < quantizationDetails.outputIntervalsCount; ++i) {
+            float shift;
+            if (quantizationDetails.outputHighValues[i] != quantizationDetails.outputLowValues[i]) {
+                shift = (dataPrecision.min * quantizationDetails.outputHighValues[i] -
+                         dataPrecision.max * quantizationDetails.outputLowValues[i]) /
+                        (quantizationDetails.outputHighValues[i] - quantizationDetails.outputLowValues[i]);
+            } else {
+                shift = 0.f;
+            }
+            if (shift < min || shift > max) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
 }  // namespace low_precision
 }  // namespace pass
 }  // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
index 15a757fa0dc..b3651cdf231 100644
--- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
@@ -16,6 +16,61 @@ namespace low_precision {
 
 WeightableLayerTransformation::WeightableLayerTransformation(const Params& params) : LayerTransformation(params) {}
 
+bool WeightableLayerTransformation::canConvolutionBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const {
+    if (!WeightableLayerTransformation::canBeTransformed(context, layer)) {
+        return false;
+    }
+
+    FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer);
+    if (!canSubtractBeHandled(layer, dequantization)) {
+        return false;
+    }
+
+    if (updatePrecisions && !NetworkHelper::checkZeroPoint(dequantization.subtract)) {
+        return false;
+    }
+
+    if (updatePrecisions && !dequantization.empty() && !dequantization.isLowPrecision()) {
+        return false;
+    }
+
+    std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(layer->get_input_node_shared_ptr(1));
+    dequantization = reshapeFromWeights == nullptr ?
+                     NetworkHelper::getDequantization(layer, 1ul) :
+                     NetworkHelper::getDequantization(reshapeFromWeights);
+
+    if (dequantization.empty()) {
+        const auto fqOnWeights = getFakeQuantizeOnWeights(layer);
+        const auto dataPrecision = getDataPrecisionOnWeights(layer);
+        if ((!supportAsymmetricQuantization) && dataPrecision.hasZeroPoint) {
+            return false;
+        }
+        if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) {
+            const std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
+            if (as_type_ptr<opset1::Constant>(resultConstant)) {
+                replace_node(fqOnWeights, resultConstant);
+            }
+            return false;
+        }
+    } else {
+        if (!NetworkHelper::checkZeroPoint(dequantization.subtract)) {
+            const auto resultDequantization = NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
+            if (resultDequantization.empty() && reshapeFromWeights) {
+                const auto foldedReshape = fold<opset1::Reshape>(
+                        reshapeFromWeights->get_input_node_shared_ptr(0),
+                        reshapeFromWeights->get_input_node_shared_ptr(1),
+                        reshapeFromWeights->get_special_zero());
+                if (is_type<opset1::Constant>(foldedReshape)) {
+                    replace_node(reshapeFromWeights, foldedReshape);
+                }
+            }
+            return false;
+        }
+    }
+
+    return true;
+}
+
 bool WeightableLayerTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const {
     if (!LayerTransformation::canBeTransformed(context, layer)) {
         return false;
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp
index 173791ad883..c52606641c4 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_qdq_transformation.cpp
@@ -404,7 +404,83 @@ const std::vector<ConvolutionQDqTransformationTestValues> testValues = {
             ngraph::element::f32,
             {{}, {}, {{ 0.0006f }, ngraph::element::f32, { 1, 1, 1, 1 }}}
         }
-    }
+    },
+    // incorrect zero point on activations [not transformed]
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32, false },
+                { {1000.f}, element::f32, {}, false },
+                { {0.02f}, element::f32, {}, false }
+            },
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {}, false },
+                { {0.03f}, element::f32, {}, false }
+            },
+            { std::vector<float>{ 2.f }, ngraph::element::i8},
+            {},
+            ngraph::element::f32,
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32, false },
+                { {1000.f}, element::f32, {}, false },
+                { {0.02f}, element::f32, {}, false }
+            },
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {}, false },
+                { {0.03f}, element::f32, {}, false }
+            },
+            { std::vector<float>{ 2.f }, ngraph::element::i8},
+            {},
+            ngraph::element::f32,
+            {}
+        }
+    },
+    // incorrect zero point on weights [not transformed, weights folded]
+    {
+        LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {}, false, 1ul, element::u8, true },
+                { {0.02f}, element::f32, {}, false }
+            },
+            {
+                { ngraph::element::f32, false },
+                { {1000.f}, element::f32, {}, false },
+                { {0.03f}, element::f32, {}, false }
+            },
+            { std::vector<float>{ 2.f }, ngraph::element::i8},
+            {},
+            ngraph::element::f32,
+            {}
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {
+                { ngraph::element::f32, false },
+                { {127.f}, element::f32, {}, false, 1ul, element::u8, true },
+                { {0.02f}, element::f32, {}, false }
+            },
+            {},
+            { std::vector<float>{ -29.94f }, ngraph::element::f32},
+            {},
+            ngraph::element::f32,
+            {}
+        }
+    },
 };
 
 INSTANTIATE_TEST_CASE_P(
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
index dcfec174dc6..4ccbc8f412a 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/convolution_transformation.cpp
@@ -382,6 +382,46 @@ const std::vector<ConvolutionTransformationTestValues> testValues = {
             {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 1, 1, 1 }}}
         }
     },
+    // incorrect zero point on activations [not transformed]
+    {
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{element::f32}, { 1000.f }, { {0.02f}, element::f32 }},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{element::f32}, { 1000.f }, { {0.02f}, element::f32 }},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
+            ngraph::element::f32,
+            {}
+        }
+    },
+    // incorrect zero point on weights [not transformed, weights folded]
+    {
+        LayerTransformation::createParamsU8I8(),
+        // ActualValues
+        {
+            ngraph::element::u8,
+            {{element::f32}, {}, { {0.02f}, element::f32 }},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 0.f }),
+            { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { 5.f }, { 6.f } }
+        },
+        // ExpectedValues
+        {
+            ngraph::element::u8,
+            {{element::f32}, {}, { {0.02f}, element::f32 }},
+            op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 5.f }),
+            {},
+            ngraph::element::f32,
+            {}
+        }
+    },
 };
 
 INSTANTIATE_TEST_CASE_P(
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
index 36091dd5323..33487f5eab6 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_function.cpp
@@ -244,7 +244,9 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getReference(
     const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();
 
     std::shared_ptr<ngraph::Node> onWeights = fakeQuantizeOnWeights.empty() ?
-        std::dynamic_pointer_cast<ngraph::Node>(weights) :
+        (weights->get_output_element_type(0).is_real() ?
+            convertedWeights :
+            std::dynamic_pointer_cast<ngraph::Node>(weights)) :
         ngraph::builder::makeFakeQuantize(
             convertedWeights->output(0),
             netPrecision,

From b291ca8cfa319fffb15013451954aab8f18ddfc4 Mon Sep 17 00:00:00 2001
From: Gleb Kazantaev <gleb.kazantaev@intel.com>
Date: Tue, 4 May 2021 16:40:20 +0300
Subject: [PATCH 39/73] Use Serialization as a default engine in MO (#5347)

* Use Serialization as a default engine in MO

* Added cmd option to use old serialization

* Added mapping file generation

* Test mapping file generation

* Fix setBatchsize parameters order; fix mapping file generation

* Added FrameworkNode; added method to read models with custom ops but without extensions

* Added python API for read_network_without_extensions function; updated mo not to use IECore

* Added read_model_without_extensions to IReader and IParser

* Fix V7 IR reader

* Fix pword value

* Fix dllexport macro usage

* Add metainfo to IR

* Fix nGraph code style

* Fix license header

* Restore prepare_emit_ir behaviour

* Fix compare_function to resolve situation when Result input port has multiple names

* Update Compare Functions

* Fix FrameworkNode validation

* Self-review

* CodeStyle check

* --use_fallback -> --use_legacy_ir_generation

* Sort imports in main.py

* --path_to_model -> --input_model

* Use logging instead of print

* Code simplifucation&cleanup

* Fix offline_Transformations key

* Fix GeneraeMappingFile comments

* Use Extension approach to work with custom ops

* Fix versions check

* Code clean-up

* Moved FrameworkNode to inference_engine_transformations library

* Fix FrameworkNode includes

* Code clean-up
---
 .../src/openvino/inference_engine/ie_api.pyx  |  6 ++
 .../openvino/inference_engine/ie_api_impl.cpp | 23 ++++++
 .../openvino/inference_engine/ie_api_impl.hpp |  3 +
 .../inference_engine/ie_api_impl_defs.pxd     |  2 +
 .../offline_transformations_api.pyx           |  4 ++
 .../offline_transformations_api_impl.cpp      |  7 ++
 .../offline_transformations_api_impl.hpp      |  2 +
 .../offline_transformations_api_impl_defs.pxd |  2 +
 .../cnn_network_ngraph_impl.cpp               |  4 ++
 .../inference_engine/ie_network_reader.cpp    | 12 ++--
 .../offline_transformations/CMakeLists.txt    |  2 +-
 .../include/generate_mapping_file.hpp         | 32 +++++++++
 .../src/generate_mapping_file.cpp             | 62 ++++++++++++++++
 .../src/readers/ir_reader/CMakeLists.txt      |  1 +
 .../src/readers/ir_reader/ie_ir_parser.cpp    | 53 ++++++++++++++
 .../src/readers/ir_reader/ie_ir_parser.hpp    |  1 +
 .../include/ngraph_ops/framework_node.hpp     | 71 +++++++++++++++++++
 .../src/ngraph_ops/framework_node.cpp         | 61 ++++++++++++++++
 .../src/transformations/serialize.cpp         | 50 +++++++++----
 .../inference_engine/CMakeLists.txt           |  1 +
 .../ir_serialization/custom_ops.cpp           | 38 ++++++++++
 .../common_test_utils/ngraph_test_utils.cpp   | 23 +++++-
 .../mo/back/ie_ir_ver_2/emitter.py            | 27 ++++++-
 .../mo/back/offline_transformations.py        | 28 ++++++--
 model-optimizer/mo/main.py                    | 57 ++++++++++++---
 model-optimizer/mo/pipeline/common.py         | 11 +--
 model-optimizer/mo/utils/cli_parser.py        |  3 +
 27 files changed, 540 insertions(+), 46 deletions(-)
 create mode 100644 inference-engine/src/offline_transformations/include/generate_mapping_file.hpp
 create mode 100644 inference-engine/src/offline_transformations/src/generate_mapping_file.cpp
 create mode 100644 inference-engine/src/transformations/include/ngraph_ops/framework_node.hpp
 create mode 100644 inference-engine/src/transformations/src/ngraph_ops/framework_node.cpp

diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
index aa3dc8a0159..6dde29050c5 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
@@ -61,6 +61,12 @@ def get_version():
     return C.get_version().decode()
 
 
+def read_network(path_to_xml : str, path_to_bin : str):
+    cdef IENetwork net = IENetwork()
+    net.impl = C.read_network(path_to_xml.encode(), path_to_bin.encode())
+    return net
+
+
 ## This class defines Tensor description
 cdef class TensorDesc:
 
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
index 9e90bb7de44..66ab5ce4c4f 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
@@ -157,6 +157,29 @@ PyObject* parse_parameter(const InferenceEngine::Parameter& param) {
     }
 }
 
+/* FrameworkNodeExtension is a temporary extension that is needed to enable FrameworkNode usage
+ * in IRReader for all unknown opsets and operations. To have a connection between Extension and
+ * IRReader we register extensions with specific version equal to "framework_node_ext" which
+ * triggers FrameworkNode usage
+ */
+class FrameworkNodeExtension : public InferenceEngine::IExtension {
+public:
+    void GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept override {
+        static InferenceEngine::Version ExtensionDescription = {{1, 0}, "1.0", "framework_node_ext"};
+
+        versionInfo = &ExtensionDescription;
+    }
+
+    void Unload() noexcept override {}
+};
+
+InferenceEnginePython::IENetwork InferenceEnginePython::read_network(std::string path_to_xml, std::string path_to_bin) {
+    InferenceEngine::Core core;
+    core.AddExtension(std::make_shared<FrameworkNodeExtension>());
+    auto net = core.ReadNetwork(path_to_xml, path_to_bin);
+    return InferenceEnginePython::IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
+}
+
 InferenceEnginePython::IENetwork::IENetwork(const std::string& model, const std::string& weights) {
     InferenceEngine::Core reader;
     auto net = reader.ReadNetwork(model, weights);
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
index 8fdf56b5b82..423fb9e1387 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
@@ -184,4 +184,7 @@ std::unique_ptr<T> make_unique(Args&&... args) {
 }
 
 std::string get_version();
+
+InferenceEnginePython::IENetwork read_network(std::string path_to_xml, std::string path_to_bin);
+
 };  // namespace InferenceEnginePython
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
index 6acf8e0b755..2895bc29c4b 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
@@ -220,3 +220,5 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
     cdef T*get_buffer[T](CBlob &)
 
     cdef string get_version()
+
+    cdef IENetwork read_network(string path_to_xml, string path_to_bin)
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx
index 0cd8519de26..dd7300d33dd 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx
@@ -24,5 +24,9 @@ def ApplyPruningTransformation(IENetwork network):
     C.ApplyPruningTransformation(network.impl)
 
 
+def GenerateMappingFile(IENetwork network, string path, bool extract_names):
+    C.GenerateMappingFile(network.impl, path, extract_names)
+
+
 def CheckAPI():
     C.CheckAPI()
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp
index a4a79d17739..d5c628b5b18 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp
@@ -4,6 +4,7 @@
 
 #include "offline_transformations_api_impl.hpp"
 
+#include <generate_mapping_file.hpp>
 #include <moc_transformations.hpp>
 #include <ngraph/opsets/opset6.hpp>
 #include <ngraph/pass/constant_folding.hpp>
@@ -43,6 +44,12 @@ void InferenceEnginePython::ApplyPruningTransformation(InferenceEnginePython::IE
     manager.run_passes(network.actual->getFunction());
 }
 
+void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork network, std::string path, bool extract_names) {
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::GenerateMappingFile>(path, extract_names);
+    manager.run_passes(network.actual->getFunction());
+}
+
 void InferenceEnginePython::CheckAPI() {
     std::shared_ptr<ngraph::Function> f;
     {
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp
index b56be6c08e5..81aca0622a5 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp
@@ -19,6 +19,8 @@ void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network);
 
 void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);
 
+void GenerateMappingFile(InferenceEnginePython::IENetwork network, std::string path, bool extract_names);
+
 void CheckAPI();
 
 };  // namespace InferenceEnginePython
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd
index 803ccdd7e66..d9d50139daf 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd
@@ -15,4 +15,6 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
 
     cdef void ApplyPruningTransformation(IENetwork network)
 
+    cdef void GenerateMappingFile(IENetwork network, string path, bool extract_names)
+
     cdef void CheckAPI()
\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
index 41e040489b2..4681680db3b 100644
--- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
+++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
@@ -265,6 +265,10 @@ size_t CNNNetworkNGraphImpl::getBatchSize() const noexcept {
     // This is not correct in general. We can follow the same semantics, but order of inputs should be
     // guaranteed to be the same.
     auto params = _ngraph_function->get_parameters();
+    sort(params.begin(), params.end(), [](std::shared_ptr<ngraph::Node> lhs, std::shared_ptr<ngraph::Node> rhs) {
+        return lhs->get_friendly_name() < rhs->get_friendly_name();
+    });
+
     for (const auto& param : params) {
         if (param->get_partial_shape().rank().is_dynamic())
             continue;
diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/ie_network_reader.cpp
index b7bccd841e3..6ffe494203e 100644
--- a/inference-engine/src/inference_engine/ie_network_reader.cpp
+++ b/inference-engine/src/inference_engine/ie_network_reader.cpp
@@ -49,8 +49,8 @@ class Reader: public IReader {
 
             if (!FileUtils::fileExist(readersLibraryPath)) {
                 IE_THROW() << "Please, make sure that Inference Engine ONNX reader library "
-                    << FileUtils::fromFilePath(::FileUtils::makePluginLibraryName({}, libraryName)) << " is in "
-                    << getIELibraryPath();
+                           << FileUtils::fromFilePath(::FileUtils::makePluginLibraryName({}, libraryName)) << " is in "
+                           << getIELibraryPath();
             }
             ptr = InferenceEngine::details::SOPointer<IReader>(readersLibraryPath);
         });
@@ -144,8 +144,8 @@ void assertIfIRv7LikeModel(std::istream & modelStream) {
     }
 
     IE_THROW() << "The support of IR v" << irVersion <<  " has been removed from the product. "
-        "Please, convert the original model using the Model Optimizer which comes with this "
-        "version of the OpenVINO to generate supported IR version.";
+                                                         "Please, convert the original model using the Model Optimizer which comes with this "
+                                                         "version of the OpenVINO to generate supported IR version.";
 }
 
 }  // namespace
@@ -227,7 +227,7 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
         }
     }
     IE_THROW() << "Unknown model format! Cannot find reader for model format: " << fileExt << " and read the model: " << modelPath <<
-        ". Please check that reader library exists in your PATH.";
+               ". Please check that reader library exists in your PATH.";
 }
 
 CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) {
@@ -248,4 +248,4 @@ CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weig
     IE_THROW() << "Unknown model format! Cannot find reader for the model and read it. Please check that reader library exists in your PATH.";
 }
 
-}  // namespace InferenceEngine
+}  // namespace InferenceEngine
\ No newline at end of file
diff --git a/inference-engine/src/offline_transformations/CMakeLists.txt b/inference-engine/src/offline_transformations/CMakeLists.txt
index 3083304e887..cde9fd43f13 100644
--- a/inference-engine/src/offline_transformations/CMakeLists.txt
+++ b/inference-engine/src/offline_transformations/CMakeLists.txt
@@ -20,7 +20,7 @@ source_group("include" FILES ${PUBLIC_HEADERS})
 add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${PUBLIC_HEADERS})
 
 target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} inference_engine_transformations ngraph::reference
-                                     PRIVATE openvino::itt)
+                                     PRIVATE openvino::itt pugixml)
 
 target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR}
                                           PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
diff --git a/inference-engine/src/offline_transformations/include/generate_mapping_file.hpp b/inference-engine/src/offline_transformations/include/generate_mapping_file.hpp
new file mode 100644
index 00000000000..15247080fd3
--- /dev/null
+++ b/inference-engine/src/offline_transformations/include/generate_mapping_file.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class GenerateMappingFile;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @brief Generate mapping file based on output tensor names.
+ */
+
+class ngraph::pass::GenerateMappingFile: public ngraph::pass::FunctionPass {
+    std::string m_path_to_file;
+    bool m_extract_name;
+public:
+    NGRAPH_RTTI_DECLARATION;
+    explicit GenerateMappingFile(const std::string & path, bool extract_name = true)
+        : m_path_to_file(path), m_extract_name(extract_name) {}
+
+    bool run_on_function(std::shared_ptr<ngraph::Function>) override;
+};
diff --git a/inference-engine/src/offline_transformations/src/generate_mapping_file.cpp b/inference-engine/src/offline_transformations/src/generate_mapping_file.cpp
new file mode 100644
index 00000000000..ba283b688c7
--- /dev/null
+++ b/inference-engine/src/offline_transformations/src/generate_mapping_file.cpp
@@ -0,0 +1,62 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <ostream>
+#include <fstream>
+
+#include "generate_mapping_file.hpp"
+
+#include "pugixml.hpp"
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::GenerateMappingFile, "GenerateMappingFile", 0);
+
+bool ngraph::pass::GenerateMappingFile::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    pugi::xml_document xml_doc;
+    pugi::xml_node root_node = xml_doc.append_child("mapping");
+
+    auto add_mapping = [&](const std::string & fw_name, const std::string & fw_port_name,
+                           const std::string & ir_name, const std::string & ir_port_name) {
+        auto map_node = root_node.append_child("map");
+        auto framework_node = map_node.append_child("framework");
+        auto ir_node = map_node.append_child("IR");
+
+        framework_node.append_attribute("name").set_value(fw_name.c_str());
+        framework_node.append_attribute("output_port_id").set_value(fw_port_name.c_str());
+
+        ir_node.append_attribute("name").set_value(ir_name.c_str());
+        ir_node.append_attribute("output_port_id").set_value(ir_port_name.c_str());
+    };
+
+    auto extract_name = [](const std::string & port_name) -> std::string {
+        return port_name.substr(0, port_name.find(':'));
+    };
+
+    for (auto && node : f->get_ordered_ops()) {
+        uint64_t ie_port_index{node->inputs().size()};
+        uint64_t ng_port_index{0};
+        for (auto && output : node->outputs()) {
+            const auto & node_name = node->get_friendly_name();
+            const auto & t = output.get_tensor_ptr();
+
+            for (const auto & port_name : t->get_names()) {
+                add_mapping(node_name, port_name, node_name, std::to_string(ie_port_index));
+
+                if (m_extract_name) {
+                    for (auto &name : t->get_names()) {
+                        add_mapping(extract_name(name), port_name, node_name, std::to_string(ie_port_index));
+                    }
+                }
+            }
+            ++ie_port_index;
+            ++ng_port_index;
+        }
+    }
+
+    // save mapping file
+    std::ofstream mapping_file(m_path_to_file, std::ios::out);
+    xml_doc.save(mapping_file);
+    mapping_file.flush();
+    return false;
+}
\ No newline at end of file
diff --git a/inference-engine/src/readers/ir_reader/CMakeLists.txt b/inference-engine/src/readers/ir_reader/CMakeLists.txt
index ce6f5ff0e93..8e41bdc1e8a 100644
--- a/inference-engine/src/readers/ir_reader/CMakeLists.txt
+++ b/inference-engine/src/readers/ir_reader/CMakeLists.txt
@@ -33,6 +33,7 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${NGRAPH_LIBRARIES}
                                              inference_engine_reader_api
                                              inference_engine_plugin_api
                                              inference_engine
+                                             inference_engine_transformations
                                              pugixml
                                              openvino::itt)
 
diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
index 0c29f342600..058a856a765 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
@@ -19,6 +19,7 @@
 #include <ngraph/opsets/opset6.hpp>
 #include <ngraph/opsets/opset7.hpp>
 #include <ngraph/variant.hpp>
+#include <ngraph_ops/framework_node.hpp>
 #include <set>
 #include <sstream>
 #include <string>
@@ -167,6 +168,8 @@ public:
         adapter.set(value);
     }
 
+    void use_framework_node(bool flag) { m_use_framework_node = flag; }
+
 private:
     struct IoMap {
         using NodeIdToIoIndex =
@@ -219,6 +222,8 @@ private:
     /// it will be used during Inputs/Outputs Description creation in SubGraph processing
     ///
     IoMap io_map;
+
+    bool m_use_framework_node{false};
 };
 
 XmlDeserializer::IoMap XmlDeserializer::updated_io_map(const pugi::xml_node& node) {
@@ -520,6 +525,26 @@ void XmlDeserializer::on_adapter(const std::string& name, ngraph::ValueAccessor<
             auto buffer = std::make_shared<SharedBuffer>(data, size, weights);
             a->set(buffer);
         }
+    } else if (auto a = ngraph::as_type<
+                        ngraph::AttributeAdapter<ngraph::op::FrameworkNodeAttrs>>(&adapter)) {
+        const auto & type = XMLParseUtils::GetStrAttr(node, "type");
+        const auto & version = XMLParseUtils::GetStrAttr(node, "version");
+
+        ngraph::op::FrameworkNodeAttrs node_attrs;
+        node_attrs.set_opset_name(version);
+        node_attrs.set_type_name(type);
+
+        pugi::xml_node dn = node.child("data");
+
+        if (!dn.empty()) {
+            std::map<std::string, std::string> attrs;
+            for (const auto & data_attr : dn.attributes()) {
+                attrs[data_attr.name()] = data_attr.as_string();
+            }
+            node_attrs.set_attrs(attrs);
+        }
+
+        a->set(node_attrs);
     } else {
         IE_THROW() << "Error IR reading. Attribute adapter can not be found for " << name
                            << " parameter";
@@ -700,6 +725,13 @@ V10Parser::V10Parser::GenericLayerParams XmlDeserializer::parseGenericParams(
             port.dims.push_back(dim);
         }
 
+        ngraph::element::Type type(ngraph::element::Type_t::undefined);
+        // Input port hasn't precision
+        if (!input) {
+            const std::string& preStr = GetStrAttr(parentNode, "precision");
+            type = InferenceEngine::details::convertPrecision(preStr);
+        }
+        port.precision = type;
         std::vector<std::string> names;
         if (getParameters<std::string>(parentNode, "names", names)) {
             for (size_t i = 0; i < names.size(); i++) {
@@ -816,6 +848,18 @@ std::shared_ptr<ngraph::Node> XmlDeserializer::createNode(
         ngraphNode = ngraphNode->clone_with_new_inputs(ngraphNode->input_values());
     }
 
+    if (!ngraphNode && m_use_framework_node) {
+        ngraphNode = std::make_shared<ngraph::op::FrameworkNode>(inputs);
+        XmlDeserializer visitor(node, weights, opsets, variables);
+        ngraphNode->visit_attributes(visitor);
+
+        size_t index{0};
+        for (const auto & output_params : params.outputPorts) {
+            ngraphNode->set_output_type(index, output_params.precision, ngraph::Shape(output_params.dims));
+            ++index;
+        }
+    }
+
     if (!ngraphNode) {
         IE_THROW() << "Cannot create " << params.type << " layer " << params.name
                            << " id:" << params.layerId
@@ -874,6 +918,15 @@ std::shared_ptr<ICNNNetwork> V10Parser::parse(
     const pugi::xml_node& root, const Blob::CPtr& weights) {
     std::shared_ptr<ngraph::Function> function;
     XmlDeserializer visitor(root, weights, opsets, variables);
+    bool use_framework_node{false};
+    for (const auto & ext : _exts) {
+        InferenceEngine::Version * version = new InferenceEngine::Version();
+        ext->GetVersion(const_cast<const Version *&>(version));
+        if (version->description && strcmp(version->description, "framework_node_ext") == 0) {
+            use_framework_node = true;
+        }
+    }
+    visitor.use_framework_node(use_framework_node);
     visitor.on_attribute("net", function);
 
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::V10Reader_RT, "ConstructCNNNetwork");
diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp
index e8fdef8a034..57cb4f1e159 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp
@@ -68,6 +68,7 @@ public:
         struct LayerPortData {
             size_t portId;
             SizeVector dims;
+            ngraph::element::Type_t precision;
             std::unordered_set<std::string> names;
         };
         size_t layerId;
diff --git a/inference-engine/src/transformations/include/ngraph_ops/framework_node.hpp b/inference-engine/src/transformations/include/ngraph_ops/framework_node.hpp
new file mode 100644
index 00000000000..ab87e7f35d7
--- /dev/null
+++ b/inference-engine/src/transformations/include/ngraph_ops/framework_node.hpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+#include <transformations_visibility.hpp>
+
+
+#include "ngraph/op/op.hpp"
+#include "ngraph/partial_shape.hpp"
+
+namespace ngraph {
+namespace op {
+
+class TRANSFORMATIONS_API FrameworkNodeAttrs {
+public:
+    void set_opset_name(const std::string& opset_name) { m_opset_name = opset_name; }
+
+    void set_type_name(const std::string& type_name) { m_type_name = type_name; }
+
+    const std::string& get_opset_name() const { return m_opset_name; }
+
+    const std::string& get_type_name() const { return m_type_name; }
+
+    const std::map<std::string, std::string>& get_attrs() const { return m_attrs; }
+
+    void set_attrs(const std::map<std::string, std::string>& attrs) { m_attrs = attrs; }
+
+private:
+    std::string m_type_name;
+    std::string m_opset_name;
+
+    std::map<std::string, std::string> m_attrs;
+};
+
+class TRANSFORMATIONS_API FrameworkNode : public Op {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    explicit FrameworkNode(const OutputVector& inputs);
+
+    void validate_and_infer_types() override;
+
+    bool visit_attributes(AttributeVisitor& visitor) override {
+        visitor.on_attribute("framework_node_attrs", m_attrs);
+        return true;
+    }
+
+    std::shared_ptr<Node>
+        clone_with_new_inputs(const OutputVector& new_args) const override;
+
+private:
+    std::vector<std::tuple<ngraph::PartialShape, ngraph::element::Type>> m_inputs_desc;
+
+    FrameworkNodeAttrs m_attrs;
+};
+} // namespace op
+
+template <>
+class TRANSFORMATIONS_API AttributeAdapter<op::FrameworkNodeAttrs>
+    : public DirectValueAccessor<op::FrameworkNodeAttrs> {
+public:
+    AttributeAdapter(op::FrameworkNodeAttrs& value);
+
+    static constexpr DiscreteTypeInfo type_info{"AttributeAdapter<FrameworkNodeAttr>", 0};
+    const DiscreteTypeInfo& get_type_info() const override { return type_info; }
+};
+} // namespace ngraph
diff --git a/inference-engine/src/transformations/src/ngraph_ops/framework_node.cpp b/inference-engine/src/transformations/src/ngraph_ops/framework_node.cpp
new file mode 100644
index 00000000000..567c54bf04d
--- /dev/null
+++ b/inference-engine/src/transformations/src/ngraph_ops/framework_node.cpp
@@ -0,0 +1,61 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_ops/framework_node.hpp"
+#include "itt.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(op::FrameworkNode, "FrameworkNode", 0);
+
+op::FrameworkNode::FrameworkNode(const OutputVector& inputs)
+    : Op(inputs) {
+    constructor_validate_and_infer_types();
+}
+
+shared_ptr<Node> op::FrameworkNode::clone_with_new_inputs(const OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(FrameworkNode_clone_with_new_inputs);
+    check_new_args_count(this, new_args);
+    auto node = std::make_shared<op::FrameworkNode>(new_args);
+    for (size_t i = 0; i < get_output_size(); ++i) {
+        node->set_output_type(i, get_output_element_type(i), get_output_partial_shape(i));
+    }
+    return node;
+}
+
+void op::FrameworkNode::validate_and_infer_types() {
+    INTERNAL_OP_SCOPE(FrameworkNode_validate_and_infer_types);
+    // Save initial inputs descriptors
+    bool initialize_input_desc = m_inputs_desc.empty();
+    for (uint64_t i = 0; i < get_input_size(); i++) {
+        // TODO: store constant values
+        const auto& new_input_desc =
+            std::make_tuple(get_input_partial_shape(i), get_input_element_type(i));
+
+        if (initialize_input_desc) {
+            m_inputs_desc.push_back(new_input_desc);
+        } else {
+            auto get_message = [&]() {
+                std::stringstream out;
+                out << "Input descriptor for " << get_friendly_name()
+                    << " node has been changed:" << std::endl;
+                out << "Before: " << std::get<0>(m_inputs_desc[i]) << ", "
+                    << std::get<1>(m_inputs_desc[i]) << std::endl;
+                out << "After:  " << std::get<0>(new_input_desc) << ", "
+                    << std::get<1>(new_input_desc) << std::endl;
+                out << "Please specify InferenceEngine Extensions to support this case.";
+                return out.str();
+            };
+
+            NODE_VALIDATION_CHECK(this, m_inputs_desc[i] == new_input_desc, get_message());
+        }
+    }
+}
+
+constexpr DiscreteTypeInfo AttributeAdapter<op::FrameworkNodeAttrs>::type_info;
+
+AttributeAdapter<op::FrameworkNodeAttrs>::AttributeAdapter(
+    op::FrameworkNodeAttrs& value)
+    : DirectValueAccessor<op::FrameworkNodeAttrs>(value) {}
diff --git a/inference-engine/src/transformations/src/transformations/serialize.cpp b/inference-engine/src/transformations/src/transformations/serialize.cpp
index 873eea32a14..475d04c6d55 100644
--- a/inference-engine/src/transformations/src/transformations/serialize.cpp
+++ b/inference-engine/src/transformations/src/transformations/serialize.cpp
@@ -13,6 +13,8 @@
 #include <ngraph/variant.hpp>
 #include "ngraph/ops.hpp"
 #include "ngraph/opsets/opset.hpp"
+#include "ngraph/opsets/opset1.hpp"
+#include "ngraph_ops/framework_node.hpp"
 #include "pugixml.hpp"
 #include "transformations/serialize.hpp"
 
@@ -318,6 +320,24 @@ public:
                 m_xml_node.append_attribute("offset").set_value(offset);
                 m_xml_node.append_attribute("size").set_value(size);
             }
+        } else if (const auto& a = ngraph::as_type<ngraph::AttributeAdapter<op::FrameworkNodeAttrs>>(&adapter)) {
+            const auto & attrs = a->get();
+
+            // Update type and version attributes
+            pugi::xml_node layer = m_xml_node.parent();
+
+            auto type_attr = layer.attribute("type");
+            auto version_attr = layer.attribute("version");
+
+            type_attr.set_value(attrs.get_type_name().c_str());
+            version_attr.set_value(attrs.get_opset_name().c_str());
+
+            // Update node attributes in data field
+            for (const auto & attr : attrs.get_attrs()) {
+                m_xml_node.append_attribute(attr.first.c_str()).set_value(attr.second.c_str());
+            }
+        } else {
+            throw ngraph_error("Unsupported attribute type for serialization: " + name);
         }
     }
 
@@ -479,8 +499,7 @@ std::string get_opset_name(
     return "experimental";
 }
 
-std::string get_output_precision_name(ngraph::Output<Node>& o) {
-    auto elem_type = o.get_element_type();
+std::string get_precision_name(const ngraph::element::Type & elem_type) {
     switch (elem_type) {
     case ::ngraph::element::Type_t::undefined:
         return "UNSPECIFIED";
@@ -517,8 +536,9 @@ std::string get_output_precision_name(ngraph::Output<Node>& o) {
     case ::ngraph::element::Type_t::boolean:
         return "BOOL";
     default:
-        NGRAPH_CHECK(false, "Unsupported precision in ", o);
-        return "";
+        std::stringstream msg;
+        msg << "Unsupported precision: " << elem_type;
+        throw ngraph_error(msg.str());
     }
 }
 
@@ -677,6 +697,9 @@ void ngfunction_2_irv10(pugi::xml_node& netXml,
         pugi::xml_node data = layer.append_child("data");
         std::string node_type_name{node->get_type_name()};
 
+        // must be executed before visit_attributes which can change values
+        layer_type_attribute.set_value(translate_type_name(node_type_name).c_str());
+
         // <layers/data> general attributes
         if (exec_graph) {
             visit_exec_graph_node(data, node_type_name, node);
@@ -686,8 +709,6 @@ void ngfunction_2_irv10(pugi::xml_node& netXml,
                          "Visitor API is not supported in ", node);
             rt_info::XmlSerializer{data}.serialize(node->get_rt_info());
         }
-        layer_type_attribute.set_value(
-            translate_type_name(node_type_name).c_str());
 
         const bool data_attr_size =
             data.attributes().begin() == data.attributes().end();
@@ -699,21 +720,20 @@ void ngfunction_2_irv10(pugi::xml_node& netXml,
         // <layers/input>
         if (node->get_input_size() > 0) {
             pugi::xml_node input = layer.append_child("input");
-            for (auto i : node->inputs()) {
+            for (const auto & i : node->inputs()) {
                 NGRAPH_CHECK(i.get_partial_shape().is_static(),
                              "Unsupported dynamic input shape in ", node);
 
                 // WA for LSTMCellv0, peephole input shall not be serialized
-                if (i.get_index() == 6) {
-                    auto type_info = node->get_type_info();
-                    if (!strcmp(type_info.name, "LSTMCell") && type_info.version == 0) {
-                        port_id++;
-                        continue;
-                    }
+                if (i.get_index() == 6 && dynamic_cast<opset1::LSTMCell *>(node)) {
+                    port_id++;
+                    continue;
                 }
 
                 pugi::xml_node port = input.append_child("port");
                 port.append_attribute("id").set_value(port_id++);
+                port.append_attribute("precision")
+                        .set_value(get_precision_name(i.get_element_type()).c_str());
                 for (auto d : i.get_shape()) {
                     pugi::xml_node dim = port.append_child("dim");
                     dim.append_child(pugi::xml_node_type::node_pcdata)
@@ -728,14 +748,14 @@ void ngfunction_2_irv10(pugi::xml_node& netXml,
         // <layers/output>
         if ((node->get_output_size() > 0) && !ngraph::op::is_output(node)) {
             pugi::xml_node output = layer.append_child("output");
-            for (auto o : node->outputs()) {
+            for (const auto & o : node->outputs()) {
                 NGRAPH_CHECK(o.get_partial_shape().is_static(),
                              "Unsupported dynamic output shape in ", node);
 
                 pugi::xml_node port = output.append_child("port");
                 port.append_attribute("id").set_value(port_id++);
                 port.append_attribute("precision")
-                    .set_value(get_output_precision_name(o).c_str());
+                    .set_value(get_precision_name(o.get_element_type()).c_str());
                 std::string names;
                 for (const auto& name : o.get_tensor().get_names()) {
                     if (!names.empty())
diff --git a/inference-engine/tests/functional/inference_engine/CMakeLists.txt b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
index 085ea6b427c..fb98af7528a 100644
--- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt
+++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
@@ -17,6 +17,7 @@ set(LINK_LIBRARIES
     sharedTestClasses
     inference_engine_snippets
     offline_transformations
+    inference_engine
 )
 
 set(DEPENDENCIES
diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp
index 01594287b82..9ad7928db2e 100644
--- a/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp
+++ b/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp
@@ -7,6 +7,7 @@
 #include <file_utils.h>
 #include <ie_api.h>
 #include <ie_iextension.h>
+#include <ie_network_reader.hpp>
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "ie_core.hpp"
 #include "ngraph/ngraph.hpp"
@@ -105,3 +106,40 @@ TEST_F(CustomOpsSerializationTest, CustomOpTransformation) {
 
     ASSERT_TRUE(success) << message;
 }
+
+class FrameworkNodeExtension : public InferenceEngine::IExtension {
+public:
+    void GetVersion(const InferenceEngine::Version *&versionInfo) const noexcept override {
+        static InferenceEngine::Version ExtensionDescription = {
+                {1, 0},
+                "1.0",
+                "framework_node_ext"
+        };
+
+        versionInfo = &ExtensionDescription;
+    }
+
+    void Unload() noexcept override {}
+};
+
+TEST_F(CustomOpsSerializationTest, CustomOpNoExtensions) {
+    const std::string model = IR_SERIALIZATION_MODELS_PATH "custom_op.xml";
+
+    InferenceEngine::Core ie;
+    auto extension = std::make_shared<FrameworkNodeExtension>();
+    ie.AddExtension(extension);
+    auto expected = ie.ReadNetwork(model);
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::Serialize>(
+            m_out_xml_path, m_out_bin_path,
+            ngraph::pass::Serialize::Version::IR_V10, extension->getOpSets());
+    manager.run_passes(expected.getFunction());
+    auto result = ie.ReadNetwork(m_out_xml_path, m_out_bin_path);
+
+    bool success;
+    std::string message;
+    std::tie(success, message) =
+            compare_functions(result.getFunction(), expected.getFunction(), true);
+
+    ASSERT_TRUE(success) << message;
+}
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp
index 94da102b606..fdfc95c0226 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/ngraph_test_utils.cpp
@@ -76,7 +76,11 @@ bool less_by_name(
     return l->get_friendly_name() < r->get_friendly_name();
 }
 
-
+bool less_by_parent_name(
+        const std::shared_ptr<ngraph::op::v0::Result> &l,
+        const std::shared_ptr<ngraph::op::v0::Result> &r) {
+    return l->get_input_node_shared_ptr(0)->get_friendly_name() < r->get_input_node_shared_ptr(0)->get_friendly_name();
+}
 
 std::string typeInfoToStr(const ngraph::Node::type_info_t &typeInfo) {
     return std::string(typeInfo.name) + "/" + to_str(typeInfo.version);
@@ -550,8 +554,21 @@ Comparator::Result Comparator::compare(
     auto f1_results = f1->get_results();
     auto f2_results = f2->get_results();
 
-    std::sort(f1_results.begin(), f1_results.end(), less_by_name);
-    std::sort(f2_results.begin(), f2_results.end(), less_by_name);
+    auto cmp = less_by_name;
+    // In case if Result source output has more than one name so the Result may have any of this names as a friendly name
+    // And in case of multiple names we sort Result operation using their parent node names
+    if (std::any_of(f1_results.begin(), f1_results.end(), [](const std::shared_ptr<ngraph::Node> & node) {
+        const auto & t = node->input_value(0).get_tensor_ptr();
+        return t->get_names().size() > 1;
+    }) || std::any_of(f2_results.begin(), f2_results.end(), [](const std::shared_ptr<ngraph::Node> & node) {
+        const auto & t = node->input_value(0).get_tensor_ptr();
+        return t->get_names().size() > 1;
+    })) {
+        cmp = less_by_parent_name;
+    }
+
+    std::sort(f1_results.begin(), f1_results.end(), cmp);
+    std::sort(f2_results.begin(), f2_results.end(), cmp);
 
     if (f1_results.size() != f2_results.size()) {
         return Result::error(
diff --git a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py
index 85dce75201b..55ead23a271 100644
--- a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py
+++ b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import hashlib
-from xml.etree.ElementTree import Element, SubElement, tostring
+from xml.etree.ElementTree import Element, SubElement, tostring, ElementTree
 
 from defusedxml.minidom import parseString
 
@@ -438,3 +438,28 @@ def port_renumber(graph: Graph):
         for v, d in node.get_sorted_outputs():
             d['out'] = base
             base += 1
+
+
+def append_ir_info(file: str, meta_info: dict = dict(), mean_data: [list, None] = None, input_names: list = None):
+    path_to_xml = file + ".xml"
+    path_to_bin = file + ".bin"
+
+    et = ElementTree()
+    et.parse(path_to_xml)
+    net = et.getroot()
+
+    if mean_data:
+        mean_offset, mean_size = serialize_mean_image(path_to_bin, mean_data=mean_data)
+        create_pre_process_block_for_image(net, input_names, mean_offset, mean_size)
+
+    add_meta_data(net, meta_info)
+
+    for elem in et.iter():
+        if elem.text:
+            elem.text = elem.text.strip()
+        if elem.tail:
+            elem.tail = elem.tail.strip()
+
+    pretty_xml_as_string = parseString(tostring(net)).toprettyxml()
+    with open(path_to_xml, 'wb') as file:
+        file.write(bytes(pretty_xml_as_string, "UTF-8"))
\ No newline at end of file
diff --git a/model-optimizer/mo/back/offline_transformations.py b/model-optimizer/mo/back/offline_transformations.py
index 3cd3b5a438a..22b317ab765 100644
--- a/model-optimizer/mo/back/offline_transformations.py
+++ b/model-optimizer/mo/back/offline_transformations.py
@@ -1,14 +1,30 @@
-#!/usr/bin/env python3
-
-# Copyright (C) 2018-2021 Intel Corporation
+# Copyright (C) 2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import argparse
+
 if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_model")
+    parser.add_argument("--framework")
+    args = parser.parse_args()
+    path_to_model = args.input_model
+
+    # This variable is only needed by GenerateMappingFile transformation
+    # to produce correct mapping
+    extract_names = True if args.framework in ['tf', 'mxnet', 'kaldi'] else False
+
     try:
-        from openvino.inference_engine import IECore # pylint: disable=import-error
-        from openvino.offline_transformations import ApplyMOCTransformations, CheckAPI # pylint: disable=import-error
+        from openvino.inference_engine import IECore, read_network # pylint: disable=import-error
+        from openvino.offline_transformations import ApplyMOCTransformations, GenerateMappingFile, CheckAPI # pylint: disable=import-error
     except Exception as e:
         print("[ WARNING ] {}".format(e))
         exit(1)
 
-    CheckAPI()
\ No newline at end of file
+    CheckAPI()
+
+    net = read_network(path_to_model + "_tmp.xml", path_to_model + "_tmp.bin")
+    net.serialize(path_to_model + ".xml", path_to_model + ".bin")
+    path_to_mapping = path_to_model + ".mapping"
+    GenerateMappingFile(net, path_to_mapping.encode('utf-8'), extract_names)
+
diff --git a/model-optimizer/mo/main.py b/model-optimizer/mo/main.py
index a1f4ca856d5..7783bea36c8 100644
--- a/model-optimizer/mo/main.py
+++ b/model-optimizer/mo/main.py
@@ -5,16 +5,18 @@ import argparse
 import datetime
 import logging as log
 import os
-import sys
 import platform
 import subprocess
+import sys
 import traceback
 from collections import OrderedDict
+from copy import deepcopy
 
 import numpy as np
 
 import telemetry.telemetry as tm
 from extensions.back.SpecialNodesFinalization import RemoveConstOps, CreateConstNodesReplacement, NormalizeTI
+from mo.back.ie_ir_ver_2.emitter import append_ir_info
 from mo.graph.graph import Graph
 from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively
 from mo.pipeline.common import prepare_emit_ir, get_ir_version
@@ -23,7 +25,8 @@ from mo.utils import import_extensions
 from mo.utils.cli_parser import get_placeholder_shapes, get_tuple_values, get_model_name, \
     get_common_cli_options, get_caffe_cli_options, get_tf_cli_options, get_mxnet_cli_options, get_kaldi_cli_options, \
     get_onnx_cli_options, get_mean_scale_dictionary, parse_tuple_pairs, get_freeze_placeholder_values, get_meta_info
-from mo.utils.error import Error, FrameworkError, classify_error_type
+from mo.utils.error import Error, FrameworkError
+from mo.utils.find_ie_version import find_ie_version
 from mo.utils.get_ov_update_message import get_ov_update_message
 from mo.utils.guess_framework import deduce_framework_by_namespace
 from mo.utils.logger import init_logger
@@ -31,7 +34,6 @@ from mo.utils.model_analysis import AnalysisResults
 from mo.utils.utils import refer_to_faq_msg
 from mo.utils.version import get_version, get_simplified_mo_version, get_simplified_ie_version
 from mo.utils.versions_checker import check_requirements
-from mo.utils.find_ie_version import find_ie_version
 
 
 def replace_ext(name: str, old: str, new: str):
@@ -245,13 +247,20 @@ def emit_ir(graph: Graph, argv: argparse.Namespace):
     for_graph_and_each_sub_graph_recursively(graph, RemoveConstOps().find_and_replace_pattern)
     for_graph_and_each_sub_graph_recursively(graph, CreateConstNodesReplacement().find_and_replace_pattern)
 
+    mean_data = deepcopy(graph.graph['mf']) if 'mf' in graph.graph else None
+    input_names = deepcopy(graph.graph['input_names']) if 'input_names' in graph.graph else []
+
     prepare_emit_ir(graph=graph,
                     data_type=graph.graph['cmd_params'].data_type,
                     output_dir=argv.output_dir,
                     output_model_name=argv.model_name,
-                    mean_data=graph.graph['mf'] if 'mf' in graph.graph else None,
-                    input_names=graph.graph['input_names'] if 'input_names' in graph.graph else [],
-                    meta_info=get_meta_info(argv))
+                    mean_data=mean_data,
+                    input_names=input_names,
+                    meta_info=get_meta_info(argv),
+                    use_temporary_path=True)
+
+    # This graph cleanup is required to avoid double memory consumption
+    graph.clear()
 
     if not (argv.framework == 'tf' and argv.tensorflow_custom_operations_config_update):
         output_dir = argv.output_dir if argv.output_dir != '.' else os.getcwd()
@@ -261,15 +270,17 @@ def emit_ir(graph: Graph, argv: argparse.Namespace):
         # This try-except is additional reinsurance that the IE
         # dependency search does not break the MO pipeline
         try:
-            if find_ie_version(silent=True):
+            if not argv.legacy_ir_generation and find_ie_version(silent=True):
                 path_to_offline_transformations = os.path.join(os.path.realpath(os.path.dirname(__file__)), 'back',
                                                                'offline_transformations.py')
-                status = subprocess.run([sys.executable, path_to_offline_transformations, orig_model_name], env=os.environ, timeout=10)
+                status = subprocess.run([sys.executable, path_to_offline_transformations,
+                                         "--input_model", orig_model_name,
+                                         "--framework", argv.framework], env=os.environ, timeout=10)
                 return_code = status.returncode
                 if return_code != 0 and not argv.silent:
-                    print("[ WARNING ] offline_transformations return code {}".format(return_code))
+                    log.error("offline_transformations return code {}".format(return_code), extra={'is_warning': True})
         except Exception as e:
-            pass
+            log.error(e, extra={'is_warning': True})
 
         message = str(dict({
             "platform": platform.system(),
@@ -281,6 +292,32 @@ def emit_ir(graph: Graph, argv: argparse.Namespace):
         t = tm.Telemetry()
         t.send_event('mo', 'offline_transformations_status', message)
 
+        # if IR wasn't produced by offline_transformations step we need to fallback to IR
+        # produced by prepare_ir. This IR needs to be renamed from XXX_tmp.xml to XXX.xml
+        suffixes = [".xml", ".bin", ".mapping"]
+        if return_code != 0:
+            log.error("Using fallback to produce IR.", extra={'is_warning': True})
+            for suf in suffixes:
+                # remove existing files
+                path_to_file = orig_model_name + suf
+                if os.path.exists(path_to_file):
+                    os.remove(path_to_file)
+
+                # rename tmp IR to original name
+                os.rename(orig_model_name + "_tmp" + suf, orig_model_name + suf)
+        else:
+            for suf in suffixes:
+                # remove existing files
+                path_to_file = orig_model_name + "_tmp" + suf
+                if os.path.exists(path_to_file):
+                    os.remove(path_to_file)
+
+            # add meta information to IR
+            append_ir_info(file=orig_model_name,
+                           meta_info=get_meta_info(argv),
+                           mean_data=mean_data,
+                           input_names=input_names)
+
         print('[ SUCCESS ] Generated IR version {} model.'.format(get_ir_version(argv)))
         print('[ SUCCESS ] XML file: {}.xml'.format(orig_model_name))
         print('[ SUCCESS ] BIN file: {}.bin'.format(orig_model_name))
diff --git a/model-optimizer/mo/pipeline/common.py b/model-optimizer/mo/pipeline/common.py
index 02e805fb62b..cd86ef26851 100644
--- a/model-optimizer/mo/pipeline/common.py
+++ b/model-optimizer/mo/pipeline/common.py
@@ -172,7 +172,8 @@ def convert_inputs_of_specific_ops(graph: Graph):
 
 
 def prepare_emit_ir(graph: Graph, data_type: str, output_dir: str, output_model_name: str,
-                    mean_data: [list, None] = None, input_names: list = None, meta_info: dict = None):
+                    mean_data: [list, None] = None, input_names: list = None, meta_info: dict = None,
+                    use_temporary_path=False):
     if input_names is None:
         input_names = []
     if meta_info is None:
@@ -206,7 +207,9 @@ def prepare_emit_ir(graph: Graph, data_type: str, output_dir: str, output_model_
 
     tensor_names.propagate_op_name_to_tensor(graph)
 
-    bin_file = os.path.join(output_dir, '{}.bin'.format(output_model_name))
+    ir_path_suffix = "_tmp" if use_temporary_path else ""
+
+    bin_file = os.path.join(output_dir, '{}{}.bin'.format(output_model_name, ir_path_suffix))
     serialize_constants(graph, bin_file)
 
     mean_offset = None
@@ -215,12 +218,12 @@ def prepare_emit_ir(graph: Graph, data_type: str, output_dir: str, output_model_
         mean_offset, mean_size = serialize_mean_image(bin_file, mean_data=mean_data)
 
     generate_ie_ir(graph=graph,
-                   file_name=os.path.join(output_dir, '{}.xml'.format(output_model_name)),
+                   file_name=os.path.join(output_dir, '{}{}.xml'.format(output_model_name, ir_path_suffix)),
                    input_names=input_names,
                    mean_offset=mean_offset,
                    mean_size=mean_size,
                    meta_info=meta_info)
-    tensor_names.output_tensor_names_map(graph, os.path.join(output_dir, '{}.mapping'.format(output_model_name)))
+    tensor_names.output_tensor_names_map(graph, os.path.join(output_dir, '{}{}.mapping'.format(output_model_name, ir_path_suffix)))
 
 
 def get_ir_version(argv: argparse.Namespace):
diff --git a/model-optimizer/mo/utils/cli_parser.py b/model-optimizer/mo/utils/cli_parser.py
index fecd0f8755e..b44e47fabf8 100644
--- a/model-optimizer/mo/utils/cli_parser.py
+++ b/model-optimizer/mo/utils/cli_parser.py
@@ -325,6 +325,9 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
     common_group.add_argument('--transformations_config',
                           help='Use the configuration file with transformations description.',
                           action=CanonicalizePathCheckExistenceAction)
+    common_group.add_argument('--legacy_ir_generation',
+                              help='Use legacy IR serialization engine',
+                              action=DeprecatedStoreTrue, default=False)
     return parser
 
 

From f93c5e09aafc3190c766acedf456f75d4c7b40ac Mon Sep 17 00:00:00 2001
From: Andrei Molotkov <andrei.molotkov@intel.com>
Date: Tue, 4 May 2021 17:06:57 +0300
Subject: [PATCH 40/73] [IE CLDNN] Fix bug with incompatible node and memory
 layouts (#5499)

---
 .../src/graph_optimizer/prepare_padding.cpp   | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp
index 3030640516c..6108607dcaf 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp
@@ -31,6 +31,10 @@ void prepare_padding::run(program_impl& p) {
                     format == format::b_fs_zyx_fsv32)
                     continue;
 
+                if (prim_node.input().is_type<data>()) {
+                    continue;
+                }
+
                 auto filter_size = prim_node.weights(0).get_output_layout().size;
 
                 auto needed_padding = calc_sliding_window_needed_input_padding(prim_node.input().get_output_layout(),
@@ -50,6 +54,10 @@ void prepare_padding::run(program_impl& p) {
                 if (!prim->with_output_size)
                     continue;
 
+                if (prim_node.input().is_type<data>()) {
+                    continue;
+                }
+
                 auto filter_size = prim_node.weights(0).get_output_layout().size;
 
                 auto needed_padding = calc_sliding_window_needed_input_padding(prim_node.input().get_output_layout(),
@@ -69,6 +77,10 @@ void prepare_padding::run(program_impl& p) {
                 if (!prim->with_output_size)
                     continue;
 
+                if (prim_node.input().is_type<data>()) {
+                    continue;
+                }
+
                 padding needed_padding;
                 // WA for this format. sliding window needs to be fixed --perf degradation for IncepctionV1 type models
                 if (node->get_output_layout().format == format::b_fs_yx_fsv16)
@@ -87,6 +99,10 @@ void prepare_padding::run(program_impl& p) {
             } else if (node->is_type<binary_convolution>()) {
                 auto& prim_node = node->as<binary_convolution>();
 
+                if (prim_node.input().is_type<data>()) {
+                    continue;
+                }
+
                 auto needed_padding = prim_node.input().get_output_layout().data_padding;
 
                 p.apply_needed_padding(prim_node, prim_node.input(), needed_padding);
@@ -127,8 +143,8 @@ void prepare_padding::run(program_impl& p) {
             prev_prim_output_layout.data_type != data_types::i8 && prev_prim_output_layout.data_type != data_types::u8)
             continue;
 
-        // We shoudn't apply any padding to nodes which are marked as outputs
-        if (conv_input_node.is_output())
+        // We shoudn't apply any padding to nodes which are marked as outputs or have type as data
+        if (conv_input_node.is_output() || conv_input_node.is_type<data>())
             continue;
 
         // Calculating input padding needed for convolution
@@ -183,8 +199,8 @@ void prepare_padding::run(program_impl& p) {
         if (conv_layout.format != cldnn::format::bfyx && conv_layout.format != cldnn::format::b_fs_yx_32fp)
             continue;
 
-        // We shoudn't apply any padding to nodes which are marked as outputs
-        if (conv_input_node.is_output())
+        // We shoudn't apply any padding to nodes which are marked as outputs or have type as data
+        if (conv_input_node.is_output() || conv_input_node.is_type<data>())
             continue;
 
         // Calculating input padding needed for convolution

From 49c9f2e6b07ab9ce6459c366e71401a0745ea46e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Tue, 4 May 2021 20:43:04 +0300
Subject: [PATCH 41/73] Removed IE API version print (#5502)

---
 .../ie_bridges/c/src/ie_c_api.cpp             |  4 +-
 .../ie_bridges/c/tests/ie_c_api_test.cpp      |  4 +-
 .../openvino/inference_engine/ie_api_impl.cpp |  5 +-
 .../common/utils/include/samples/common.hpp   | 69 -------------------
 4 files changed, 3 insertions(+), 79 deletions(-)

diff --git a/inference-engine/ie_bridges/c/src/ie_c_api.cpp b/inference-engine/ie_bridges/c/src/ie_c_api.cpp
index 2d7093b5a9c..30ef377a7b4 100644
--- a/inference-engine/ie_bridges/c/src/ie_c_api.cpp
+++ b/inference-engine/ie_bridges/c/src/ie_c_api.cpp
@@ -203,9 +203,7 @@ void parameter2IEparam(const IE::Parameter param, ie_param_t *ie_param) {
 
 ie_version_t ie_c_api_version(void) {
     auto version = IE::GetInferenceEngineVersion();
-    std::string version_str = std::to_string(version->apiVersion.major) + ".";
-    version_str += std::to_string(version->apiVersion.minor) + ".";
-    version_str += version->buildNumber;
+    std::string version_str = version->buildNumber;
 
     ie_version_t version_res;
     std::unique_ptr<char[]> ver(new char[version_str.length() + 1]);
diff --git a/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp b/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp
index f061b024bd0..c12f98013de 100644
--- a/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp
+++ b/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp
@@ -93,9 +93,7 @@ size_t find_device(ie_available_devices_t avai_devices, const char *device_name)
 TEST(ie_c_api_version, apiVersion) {
     ie_version_t version = ie_c_api_version();
     auto ver = InferenceEngine::GetInferenceEngineVersion();
-    std::string ver_str = std::to_string(ver->apiVersion.major) + ".";
-    ver_str += std::to_string(ver->apiVersion.minor) + ".";
-    ver_str += ver->buildNumber;
+    std::string ver_str = ver->buildNumber;
 
     EXPECT_EQ(strcmp(version.api_version, ver_str.c_str()), 0);
     ie_version_free(&version);
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
index 66ab5ce4c4f..8c0697da4a7 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
@@ -456,10 +456,7 @@ IE_SUPPRESS_DEPRECATED_END
 
 std::string InferenceEnginePython::get_version() {
     auto version = InferenceEngine::GetInferenceEngineVersion();
-    std::string version_str = std::to_string(version->apiVersion.major) + ".";
-    version_str += std::to_string(version->apiVersion.minor) + ".";
-    version_str += version->buildNumber;
-    return version_str;
+    return version->buildNumber;
 }
 
 InferenceEnginePython::IECore::IECore(const std::string& xmlConfigFile) {
diff --git a/inference-engine/samples/common/utils/include/samples/common.hpp b/inference-engine/samples/common/utils/include/samples/common.hpp
index b77b38c1ed2..3dfb63c9cbb 100644
--- a/inference-engine/samples/common/utils/include/samples/common.hpp
+++ b/inference-engine/samples/common/utils/include/samples/common.hpp
@@ -87,24 +87,6 @@ inline std::string fileExt(const std::string& filename) {
     return filename.substr(pos + 1);
 }
 
-static UNUSED std::ostream& operator<<(std::ostream& os, const InferenceEngine::Version* version) {
-    os << "\n\tAPI version ............ ";
-    if (nullptr == version) {
-        os << "UNKNOWN";
-    } else {
-        os << version->apiVersion.major << "." << version->apiVersion.minor;
-        if (nullptr != version->buildNumber) {
-            os << "\n\t"
-               << "Build .................. " << version->buildNumber;
-        }
-        if (nullptr != version->description) {
-            os << "\n\t"
-               << "Description ....... " << version->description;
-        }
-    }
-    return os;
-}
-
 inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Version& version) {
     os << "\t" << version.description << " version ......... ";
     os << version.apiVersion.major << "." << version.apiVersion.minor;
@@ -124,57 +106,6 @@ inline std::ostream& operator<<(std::ostream& os, const std::map<std::string, In
     return os;
 }
 
-static UNUSED std::vector<std::vector<size_t>> blobToImageOutputArray(InferenceEngine::TBlob<float>::Ptr output, size_t* pWidth, size_t* pHeight,
-                                                                      size_t* pChannels) {
-    std::vector<std::vector<size_t>> outArray;
-    size_t W = 0, C = 0, H = 0;
-
-    auto outputDims = output->getTensorDesc().getDims();
-    if (outputDims.size() == 3) {
-        C = outputDims.at(0);
-        H = outputDims.at(1);
-        W = outputDims.at(2);
-    } else if (outputDims.size() == 4) {
-        C = outputDims.at(1);
-        H = outputDims.at(2);
-        W = outputDims.at(3);
-    } else if (outputDims.size() == 5) {
-        C = outputDims.at(1);
-        H = outputDims.at(3);
-        W = outputDims.at(4);
-    } else {
-        IE_THROW() << "Output blob has unsupported layout " << output->getTensorDesc().getLayout();
-    }
-
-    // Get classes
-    const float* outData = output->data();
-    for (unsigned h = 0; h < H; h++) {
-        std::vector<size_t> row;
-        for (unsigned w = 0; w < W; w++) {
-            float max_value = outData[h * W + w];
-            size_t index = 0;
-            for (size_t c = 1; c < C; c++) {
-                size_t dataIndex = c * H * W + h * W + w;
-                if (outData[dataIndex] > max_value) {
-                    index = c;
-                    max_value = outData[dataIndex];
-                }
-            }
-            row.push_back(index);
-        }
-        outArray.push_back(row);
-    }
-
-    if (pWidth != nullptr)
-        *pWidth = W;
-    if (pHeight != nullptr)
-        *pHeight = H;
-    if (pChannels != nullptr)
-        *pChannels = C;
-
-    return outArray;
-}
-
 /**
  * @class Color
  * @brief A Color class stores channels of a given color

From ffb3a4d32b703d8e673e253780626b850872c808 Mon Sep 17 00:00:00 2001
From: Andrey Dmitriev <andrey.dmitriev@intel.com>
Date: Wed, 5 May 2021 09:55:27 +0300
Subject: [PATCH 42/73] [GNA] Run caching test (#5477)

---
 .../plugin/gna/shared_tests_instances/skip_tests_config.cpp     | 2 --
 1 file changed, 2 deletions(-)

diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
index a4f7353ba15..9f2c05ab6d1 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@@ -62,8 +62,6 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*CachingSupport.*_(u8|i16)_.*)",
         // TODO: Issue 51527
         R"(.*CachingSupport.*_batch2_.*)",
-        // TODO: Issue 51526
-        R"(.*CachingSupport.*ConvPoolRelu.*)",
         // TODO: Issue 51525
         R"(.*CachingSupport.*KSOFunction.*)",
     };

From 2867aab94dd1db1821b2fdf96826f1ce91e86219 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= <tomasz.dolbniak@intel.com>
Date: Wed, 5 May 2021 09:00:49 +0200
Subject: [PATCH 43/73] [IE CLDNN] Fix for a segfault discovered in ONNX Pad
 tests (#5444)

---
 inference-engine/src/cldnn_engine/ops/pad.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/inference-engine/src/cldnn_engine/ops/pad.cpp b/inference-engine/src/cldnn_engine/ops/pad.cpp
index 790670ee9dd..a3503318a77 100644
--- a/inference-engine/src/cldnn_engine/ops/pad.cpp
+++ b/inference-engine/src/cldnn_engine/ops/pad.cpp
@@ -27,8 +27,10 @@ static std::vector<int32_t> GetPermuteOrder(const ngraph::CoordinateDiff& ie_ord
     std::vector<int32_t> cldnn_order(ie_order.begin(), ie_order.end());
 
     // 1. Align to min. 4 sizes
-    if (cldnn_order.size() < 4)
-        cldnn_order.push_back(0);
+    if (cldnn_order.size() < 4) {
+        const auto zeros_to_add = 4 - ie_order.size();
+        cldnn_order.insert(cldnn_order.end(), zeros_to_add, 0);
+    }
 
     // 2. Swap spatial positions
     for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {

From 511cddb87f2dc78153f1cd4fa898e57c1ff11160 Mon Sep 17 00:00:00 2001
From: Jan Iwaszkiewicz <jan.iwaszkiewicz@intel.com>
Date: Wed, 5 May 2021 10:15:07 +0200
Subject: [PATCH 44/73] [nGraph] Add documentation strings (#5418)

---
 ngraph/python/src/pyngraph/axis_set.cpp      |  12 +-
 ngraph/python/src/pyngraph/axis_vector.cpp   |   9 +-
 ngraph/python/src/pyngraph/dimension.cpp     | 167 +++++++++++++-
 ngraph/python/src/pyngraph/function.cpp      | 222 +++++++++++++++++--
 ngraph/python/src/pyngraph/node.cpp          | 186 ++++++++++++++--
 ngraph/python/src/pyngraph/node_input.cpp    |  66 +++++-
 ngraph/python/src/pyngraph/node_output.cpp   |  66 +++++-
 ngraph/python/src/pyngraph/partial_shape.cpp | 135 ++++++++++-
 ngraph/python/src/pyngraph/pyngraph.cpp      |  10 +-
 ngraph/python/src/pyngraph/shape.cpp         |   9 +-
 ngraph/python/src/pyngraph/strides.cpp       |   9 +-
 ngraph/python/src/pyngraph/variant.hpp       |  17 +-
 12 files changed, 822 insertions(+), 86 deletions(-)

diff --git a/ngraph/python/src/pyngraph/axis_set.cpp b/ngraph/python/src/pyngraph/axis_set.cpp
index 50eab158fa7..7fd725ede35 100644
--- a/ngraph/python/src/pyngraph/axis_set.cpp
+++ b/ngraph/python/src/pyngraph/axis_set.cpp
@@ -18,10 +18,14 @@ void regclass_pyngraph_AxisSet(py::module m)
 {
     py::class_<ngraph::AxisSet, std::shared_ptr<ngraph::AxisSet>> axis_set(m, "AxisSet");
     axis_set.doc() = "ngraph.impl.AxisSet wraps ngraph::AxisSet";
-    axis_set.def(py::init<const std::initializer_list<size_t>&>());
-    axis_set.def(py::init<const std::set<size_t>&>());
-    axis_set.def(py::init<const std::vector<size_t>&>());
-    axis_set.def(py::init<const ngraph::AxisSet&>());
+    axis_set.def(py::init<const std::initializer_list<size_t>&>(),
+                 py::arg("axes"));
+    axis_set.def(py::init<const std::set<size_t>&>(),
+                 py::arg("axes"));
+    axis_set.def(py::init<const std::vector<size_t>&>(),
+                 py::arg("axes"));
+    axis_set.def(py::init<const ngraph::AxisSet&>(),
+                 py::arg("axes"));
 
     axis_set.def("__len__", [](const ngraph::AxisSet& v) { return v.size(); });
 
diff --git a/ngraph/python/src/pyngraph/axis_vector.cpp b/ngraph/python/src/pyngraph/axis_vector.cpp
index f93ac52e740..836e43f4ae3 100644
--- a/ngraph/python/src/pyngraph/axis_vector.cpp
+++ b/ngraph/python/src/pyngraph/axis_vector.cpp
@@ -15,7 +15,10 @@ void regclass_pyngraph_AxisVector(py::module m)
     py::class_<ngraph::AxisVector, std::shared_ptr<ngraph::AxisVector>> axis_vector(m,
                                                                                     "AxisVector");
     axis_vector.doc() = "ngraph.impl.AxisVector wraps ngraph::AxisVector";
-    axis_vector.def(py::init<const std::initializer_list<size_t>&>());
-    axis_vector.def(py::init<const std::vector<size_t>&>());
-    axis_vector.def(py::init<const ngraph::AxisVector&>());
+    axis_vector.def(py::init<const std::initializer_list<size_t>&>(),
+                    py::arg("axes"));
+    axis_vector.def(py::init<const std::vector<size_t>&>(),
+                    py::arg("axes"));
+    axis_vector.def(py::init<const ngraph::AxisVector&>(),
+                    py::arg("axes"));
 }
diff --git a/ngraph/python/src/pyngraph/dimension.cpp b/ngraph/python/src/pyngraph/dimension.cpp
index 8e4a105c584..a25948a37f5 100644
--- a/ngraph/python/src/pyngraph/dimension.cpp
+++ b/ngraph/python/src/pyngraph/dimension.cpp
@@ -21,13 +21,53 @@ void regclass_pyngraph_Dimension(py::module m)
     py::class_<ngraph::Dimension, std::shared_ptr<ngraph::Dimension>> dim(m, "Dimension");
     dim.doc() = "ngraph.impl.Dimension wraps ngraph::Dimension";
     dim.def(py::init<>());
-    dim.def(py::init<value_type&>());
-    dim.def(py::init<value_type&, value_type&>());
+    dim.def(py::init<value_type&>(),
+            py::arg("dimension"),
+            R"(
+                Construct a static dimension.
+
+                Parameters
+                ----------
+                 dimension : int
+                    Value of the dimension.
+            )");
+    dim.def(py::init<value_type&, value_type&>(),
+            py::arg("min_dimension"),
+            py::arg("max_dimension"),
+            R"(
+                Construct a dynamic dimension with bounded range.
+
+                Parameters
+                ----------
+                min_dimension : int
+                    The lower inclusive limit for the dimension.
+
+                max_dimension : int
+                    The upper inclusive limit for the dimension.
+            )");
 
     dim.def_static("dynamic", &ngraph::Dimension::dynamic);
 
-    dim.def_property_readonly("is_dynamic", &ngraph::Dimension::is_dynamic);
-    dim.def_property_readonly("is_static", &ngraph::Dimension::is_static);
+    dim.def_property_readonly("is_dynamic",
+                              &ngraph::Dimension::is_dynamic,
+                              R"(
+                                Check if Dimension is dynamic.
+
+                                Returns
+                                ----------
+                                is_dynamic : bool
+                                    True if dynamic, else False.
+                              )");
+    dim.def_property_readonly("is_static",
+                              &ngraph::Dimension::is_static,
+                              R"(
+                                Check if Dimension is static.
+
+                                Returns
+                                ----------
+                                is_static : bool
+                                    True if static, else False.
+                              )");
 
     dim.def(
         "__eq__",
@@ -39,14 +79,119 @@ void regclass_pyngraph_Dimension(py::module m)
         py::is_operator());
 
     dim.def("__len__", &ngraph::Dimension::get_length);
-    dim.def("get_length", &ngraph::Dimension::get_length);
-    dim.def("get_min_length", &ngraph::Dimension::get_min_length);
-    dim.def("get_max_length", &ngraph::Dimension::get_max_length);
+    dim.def("get_length",
+            &ngraph::Dimension::get_length,
+            R"(
+                Return this dimension as integer.
+                This dimension must be static and non-negative.
 
-    dim.def("same_scheme", &ngraph::Dimension::same_scheme);
-    dim.def("compatible", &ngraph::Dimension::compatible);
-    dim.def("relaxes", &ngraph::Dimension::relaxes);
-    dim.def("refines", &ngraph::Dimension::refines);
+                Returns
+                ----------
+                get_length : int
+                    Value of the dimension.
+            )");
+    dim.def("get_min_length",
+            &ngraph::Dimension::get_min_length,
+            R"(
+                Return this dimension's min_dimension as integer.
+                This dimension must be dynamic and non-negative.
+
+                Returns
+                ----------
+                get_min_length : int
+                    Value of the dimension.
+            )");
+    dim.def("get_max_length",
+            &ngraph::Dimension::get_max_length,
+            R"(
+                Return this dimension's max_dimension as integer.
+                This dimension must be dynamic and non-negative.
+
+                Returns
+                ----------
+                get_max_length : int
+                    Value of the dimension.
+            )");
+
+    dim.def("same_scheme",
+            &ngraph::Dimension::same_scheme,
+            py::arg("dim"),
+            R"(
+                Return this dimension's max_dimension as integer.
+                This dimension must be dynamic and non-negative.
+
+                Parameters
+                ----------
+                dim : Dimension
+                    The other dimension to compare this dimension to.
+
+                Returns
+                ----------
+                same_scheme : bool
+                    True if this dimension and dim are both dynamic,
+                    or if they are both static and equal, otherwise False.
+            )");
+    dim.def("compatible",
+            &ngraph::Dimension::compatible,
+            py::arg("d"),
+            R"(
+                Check whether this dimension is capable of being merged 
+                with the argument dimension.
+
+                Parameters
+                ----------
+                d : Dimension
+                    The dimension to compare this dimension with.
+
+                Returns
+                ----------
+                compatible : bool
+                    True if this dimension is compatible with d, else False.
+            )");
+    dim.def("relaxes",
+            &ngraph::Dimension::relaxes,
+            py::arg("d"),
+            R"(
+                Check whether this dimension is a relaxation of the argument.
+                This dimension relaxes (or is a relaxation of) d if:
+
+                (1) this and d are static and equal
+                (2) this dimension contains d dimension
+
+                this.relaxes(d) is equivalent to d.refines(this).
+
+                Parameters
+                ----------
+                d : Dimension
+                    The dimension to compare this dimension with.
+
+                Returns
+                ----------
+                relaxes : bool
+                    True if this dimension relaxes d, else False.
+            )");
+    dim.def("refines",
+            &ngraph::Dimension::refines,
+            py::arg("d"),
+            R"(
+                Check whether this dimension is a refinement of the argument.
+                This dimension refines (or is a refinement of) d if:
+
+                (1) this and d are static and equal
+                (2) d dimension contains this dimension
+
+                this.refines(d) is equivalent to d.relaxes(this).
+
+                Parameters
+                ----------
+                d : Dimension
+                    The dimension to compare this dimension with.
+
+                Returns
+                ----------
+                relaxes : bool
+                    True if this dimension refines d, else False.
+            )");
 
     dim.def("__str__", [](const ngraph::Dimension& self) -> std::string {
         std::stringstream ss;
diff --git a/ngraph/python/src/pyngraph/function.cpp b/ngraph/python/src/pyngraph/function.cpp
index 9041208b65b..14da9757818 100644
--- a/ngraph/python/src/pyngraph/function.cpp
+++ b/ngraph/python/src/pyngraph/function.cpp
@@ -19,24 +19,214 @@ void regclass_pyngraph_Function(py::module m)
     function.doc() = "ngraph.impl.Function wraps ngraph::Function";
     function.def(py::init<const std::vector<std::shared_ptr<ngraph::Node>>&,
                           const std::vector<std::shared_ptr<ngraph::op::Parameter>>&,
-                          const std::string&>());
+                          const std::string&>(),
+                 py::arg("results"),
+                 py::arg("parameters"),
+                 py::arg("name"),
+                 R"(
+                    Create user-defined Function which is a representation of a model.
+
+                    Parameters
+                    ----------
+                    results : List[Node]
+                        List of Nodes to be used as results.
+
+                    parameters : List[op.Parameter]
+                        List of parameters.
+
+                    name : str
+                        String to set as function's freindly name.
+                 )");
     function.def(py::init<const std::shared_ptr<ngraph::Node>&,
                           const std::vector<std::shared_ptr<ngraph::op::Parameter>>&,
-                          const std::string&>());
-    function.def("get_output_size", &ngraph::Function::get_output_size);
-    function.def("get_ops", &ngraph::Function::get_ops);
-    function.def("get_ordered_ops", &ngraph::Function::get_ordered_ops);
-    function.def("get_output_op", &ngraph::Function::get_output_op);
-    function.def("get_output_element_type", &ngraph::Function::get_output_element_type);
-    function.def("get_output_shape", &ngraph::Function::get_output_shape);
-    function.def("get_output_partial_shape", &ngraph::Function::get_output_partial_shape);
-    function.def("get_parameters", &ngraph::Function::get_parameters);
-    function.def("get_results", &ngraph::Function::get_results);
-    function.def("get_result", &ngraph::Function::get_result);
-    function.def("get_name", &ngraph::Function::get_name);
-    function.def("get_friendly_name", &ngraph::Function::get_friendly_name);
-    function.def("set_friendly_name", &ngraph::Function::set_friendly_name);
-    function.def("is_dynamic", &ngraph::Function::is_dynamic);
+                          const std::string&>(),
+                 py::arg("result"),
+                 py::arg("parameters"),
+                 py::arg("name"),
+                 R"(
+                    Create user-defined Function which is a representation of a model.
+
+                    Parameters
+                    ----------
+                    results : Node
+                        Node to be used as result.
+
+                    parameters : List[op.Parameter]
+                        List of parameters.
+
+                    name : str
+                        String to set as function's freindly name.
+                 )");
+    function.def("get_output_size",
+                 &ngraph::Function::get_output_size,
+                 R"(
+                    Return the number of outputs for the function.
+
+                    Returns
+                    ----------
+                    get_output_size : int
+                        Number of outputs.
+                 )");
+    function.def("get_ops",
+                 &ngraph::Function::get_ops,
+                 R"(
+                    Return ops used in the function.
+
+                    Returns
+                    ----------
+                    get_ops : List[Node]
+                        List of Nodes representing ops used in function.
+                 )");
+    function.def("get_ordered_ops",
+                 &ngraph::Function::get_ordered_ops,
+                 R"(
+                    Return ops used in the function in topological order.
+
+                    Returns
+                    ----------
+                    get_ordered_ops : List[Node]
+                        List of sorted Nodes representing ops used in function.
+                 )");
+    function.def("get_output_op",
+                 &ngraph::Function::get_output_op,
+                 py::arg("i"),
+                 R"(
+                    Return the op that generates output i
+
+                    Parameters
+                    ----------
+                    i : int
+                        output index
+
+                    Returns
+                    ----------
+                    get_output_op : Node
+                        Node object that generates output i
+                )");
+    function.def("get_output_element_type",
+                 &ngraph::Function::get_output_element_type,
+                 py::arg("i"),
+                 R"(
+                    Return the element type of output i
+
+                    Parameters
+                    ----------
+                    i : int
+                        output index
+
+                    Returns
+                    ----------
+                    get_output_op : Type
+                        Type object of output i
+                 )");
+    function.def("get_output_shape",
+                 &ngraph::Function::get_output_shape,
+                 py::arg("i"),
+                 R"(
+                    Return the shape of element i
+
+                    Parameters
+                    ----------
+                    i : int
+                        element index
+
+                    Returns
+                    ----------
+                    get_output_shape : Shape
+                        Shape object of element i
+                 )");
+    function.def("get_output_partial_shape",
+                 &ngraph::Function::get_output_partial_shape,
+                 py::arg("i"),
+                 R"(
+                    Return the partial shape of element i
+
+                    Parameters
+                    ----------
+                    i : int
+                        element index
+
+                    Returns
+                    ----------
+                    get_output_partial_shape : PartialShape
+                        PartialShape object of element i
+                 )");
+    function.def("get_parameters",
+                 &ngraph::Function::get_parameters,
+                 R"(
+                    Return the function parameters.
+
+                    Returns
+                    ----------
+                    get_parameters : ParameterVector
+                        ParameterVector containing function parameters.
+                 )");
+    function.def("get_results",
+                 &ngraph::Function::get_results,
+                 R"(
+                    Return a list of function outputs.
+
+                    Returns
+                    ----------
+                    get_results : ResultVector
+                        ResultVector containing function parameters.
+                 )");
+    function.def("get_result",
+                 &ngraph::Function::get_result,
+                 R"(
+                    Return single result.
+
+                    Returns
+                    ----------
+                    get_result : Node
+                        Node object representing result.
+                 )");
+    function.def("get_name",
+                 &ngraph::Function::get_name,
+                 R"(
+                    Get the unique name of the function.
+
+                    Returns
+                    ----------
+                    get_name : str
+                        String with a name of the function.
+                 )");
+    function.def("get_friendly_name",
+                 &ngraph::Function::get_friendly_name,
+                 R"(
+                    Gets the friendly name for a function. If no
+                    friendly name has been set via set_friendly_name
+                    then the function's unique name is returned.
+
+                    Returns
+                    ----------
+                    get_friendly_name : str
+                        String with a friendly name of the function.
+                 )");
+    function.def("set_friendly_name",
+                 &ngraph::Function::set_friendly_name,
+                 py::arg("name"),
+                 R"(
+                    Sets a friendly name for a function. This does
+                    not overwrite the unique name of the function and
+                    is retrieved via get_friendly_name(). Used mainly
+                    for debugging.
+
+                    Parameters
+                    ----------
+                    name : str
+                        String to set as the friendly name.
+                 )");
+    function.def("is_dynamic",
+                 &ngraph::Function::is_dynamic,
+                 R"(
+                    Returns true if any of the op's defined in the function 
+                    contains partial shape.
+
+                    Returns
+                    ----------
+                    is_dynamic : bool
+                 )");
     function.def("__repr__", [](const ngraph::Function& self) {
         std::string class_name = py::cast(self).get_type().attr("__name__").cast<std::string>();
         std::stringstream shapes_ss;
diff --git a/ngraph/python/src/pyngraph/node.cpp b/ngraph/python/src/pyngraph/node.cpp
index c0f069ff42d..b03a30e949c 100644
--- a/ngraph/python/src/pyngraph/node.cpp
+++ b/ngraph/python/src/pyngraph/node.cpp
@@ -72,26 +72,182 @@ void regclass_pyngraph_Node(py::module m)
         return "<" + type_name + ": '" + self.get_friendly_name() + "' (" + shapes_ss.str() + ")>";
     });
 
-    node.def("get_element_type", &ngraph::Node::get_element_type);
-    node.def("get_output_size", &ngraph::Node::get_output_size);
-    node.def("get_output_element_type", &ngraph::Node::get_output_element_type);
-    node.def("get_output_shape", &ngraph::Node::get_output_shape);
-    node.def("get_output_partial_shape", &ngraph::Node::get_output_partial_shape);
-    node.def("get_type_name", &ngraph::Node::get_type_name);
-    node.def("get_name", &ngraph::Node::get_name);
-    node.def("get_friendly_name", &ngraph::Node::get_friendly_name);
-    node.def("set_friendly_name", &ngraph::Node::set_friendly_name);
-    node.def("input", (ngraph::Input<ngraph::Node>(ngraph::Node::*)(size_t)) & ngraph::Node::input);
+    node.def("get_element_type",
+             &ngraph::Node::get_element_type,
+             R"(
+                Checks that there is exactly one output and returns it's element type.
+
+                Returns
+                ----------
+                get_element_type : Type
+                    Type of the output.
+             )");
+    node.def("get_output_size",
+             &ngraph::Node::get_output_size,
+             R"(
+                Returns the number of outputs from the node.
+
+                Returns
+                ----------
+                get_element_type : int
+                    Number of outputs.
+             )");
+    node.def("get_output_element_type",
+             &ngraph::Node::get_output_element_type,
+             py::arg("i"),
+             R"(
+                Returns the element type for output i
+
+                Parameters
+                ----------
+                i : int
+                    Index of the output.
+
+                Returns
+                ----------
+                get_output_element_type : Type
+                    Type of the output i
+             )");
+    node.def("get_output_shape",
+             &ngraph::Node::get_output_shape,
+             py::arg("i"),
+             R"(
+                Returns the shape for output i
+
+                Parameters
+                ----------
+                i : int
+                    Index of the output.
+
+                Returns
+                ----------
+                get_output_shape : Shape
+                    Shape of the output i
+             )");
+    node.def("get_output_partial_shape",
+             &ngraph::Node::get_output_partial_shape,
+             py::arg("i"),
+             R"(
+                Returns the partial shape for output i
+
+                Parameters
+                ----------
+                i : int
+                    Index of the output.
+
+                Returns
+                ----------
+                get_output_partial_shape : PartialShape
+                    PartialShape of the output i
+             )");
+    node.def("get_type_name",
+             &ngraph::Node::get_type_name,
+             R"(
+                Returns Type's name from the node.
+
+                Returns
+                ----------
+                get_type_name : str
+                    String repesenting Type's name. 
+             )");
+    node.def("get_name",
+             &ngraph::Node::get_name,
+             R"(
+                Get the unique name of the node
+
+                Returns
+                ----------
+                get_name : str
+                    Unique name of the node.
+             )");
+    node.def("get_friendly_name",
+             &ngraph::Node::get_friendly_name,
+             R"(
+                Gets the friendly name for a node. If no friendly name has 
+                been set via set_friendly_name then the node's unique name
+                is returned.
+
+                Returns
+                ----------
+                get_name : str
+                    Friendly name of the node.
+             )");
+    node.def("set_friendly_name",
+             &ngraph::Node::set_friendly_name,
+             py::arg("name"),
+             R"(
+                Sets a friendly name for a node. This does not overwrite the unique name
+                of the node and is retrieved via get_friendly_name(). Used mainly for 
+                debugging. The friendly name may be set exactly once.
+
+                Parameters
+                ----------
+                name : str
+                    Friendly name to set.
+             )");
+    node.def("input",
+             (ngraph::Input<ngraph::Node>(ngraph::Node::*)(size_t)) & ngraph::Node::input,
+             py::arg("input_index"),
+             R"(
+                A handle to the input_index input of this node.
+
+                Parameters
+                ----------
+                input_index : int
+                    Index of Input.
+
+                Returns
+                ----------
+                input : Input
+                    Input of this node.
+             )");
     node.def("inputs",
-             (std::vector<ngraph::Input<ngraph::Node>>(ngraph::Node::*)()) & ngraph::Node::inputs);
+             (std::vector<ngraph::Input<ngraph::Node>>(ngraph::Node::*)()) & ngraph::Node::inputs,
+             R"(
+                A list containing a handle for each of this node's inputs, in order.
+
+                Returns
+                ----------
+                inputs : List[Input]
+                    List of node's inputs.
+             )");
     node.def("output",
-             (ngraph::Output<ngraph::Node>(ngraph::Node::*)(size_t)) & ngraph::Node::output);
+             (ngraph::Output<ngraph::Node>(ngraph::Node::*)(size_t)) & ngraph::Node::output,
+             py::arg("output_index"),
+             R"(
+                A handle to the output_index output of this node.
+
+                Parameters
+                ----------
+                output_index : int
+                    Index of Output.
+
+                Returns
+                ----------
+                input : Output
+                    Output of this node.
+             )");
     node.def("outputs",
-             (std::vector<ngraph::Output<ngraph::Node>>(ngraph::Node::*)()) &
-                 ngraph::Node::outputs);
+             (std::vector<ngraph::Output<ngraph::Node>>(ngraph::Node::*)()) & ngraph::Node::outputs,
+             R"(
+                A list containing a handle for each of this node's outputs, in order.
+
+                Returns
+                ----------
+                inputs : List[Output]
+                    List of node's outputs.
+             )");
     node.def("get_rt_info",
              (PyRTMap & (ngraph::Node::*)()) & ngraph::Node::get_rt_info,
-             py::return_value_policy::reference_internal);
+             py::return_value_policy::reference_internal,
+             R"(
+                Returns PyRTMap which is a dictionary of user defined runtime info.
+
+                Returns
+                ----------
+                get_rt_info : PyRTMap
+                    A dictionary of user defined data.
+             )");
 
     node.def_property_readonly("shape", &ngraph::Node::get_shape);
     node.def_property_readonly("name", &ngraph::Node::get_name);
diff --git a/ngraph/python/src/pyngraph/node_input.cpp b/ngraph/python/src/pyngraph/node_input.cpp
index 3a4c8729ffc..b71726901d1 100644
--- a/ngraph/python/src/pyngraph/node_input.cpp
+++ b/ngraph/python/src/pyngraph/node_input.cpp
@@ -16,10 +16,64 @@ void regclass_pyngraph_Input(py::module m)
         m, "Input", py::dynamic_attr());
     input.doc() = "ngraph.impl.Input wraps ngraph::Input<Node>";
 
-    input.def("get_node", &ngraph::Input<ngraph::Node>::get_node);
-    input.def("get_index", &ngraph::Input<ngraph::Node>::get_index);
-    input.def("get_element_type", &ngraph::Input<ngraph::Node>::get_element_type);
-    input.def("get_shape", &ngraph::Input<ngraph::Node>::get_shape);
-    input.def("get_partial_shape", &ngraph::Input<ngraph::Node>::get_partial_shape);
-    input.def("get_source_output", &ngraph::Input<ngraph::Node>::get_source_output);
+    input.def("get_node",
+              &ngraph::Input<ngraph::Node>::get_node,
+              R"(
+                Get node referenced by this input handle.
+
+                Returns
+                ----------
+                get_node : Node
+                    Node object referenced by this input handle.
+              )");
+    input.def("get_index",
+              &ngraph::Input<ngraph::Node>::get_index,
+              R"(
+                The index of the input referred to by this input handle.
+
+                Returns
+                ----------
+                get_index : int
+                    Index value as integer.
+              )");
+    input.def("get_element_type",
+              &ngraph::Input<ngraph::Node>::get_element_type,
+              R"(
+                The element type of the input referred to by this input handle.
+
+                Returns
+                ----------
+                get_element_type : Type
+                    Type of the input.
+              )");
+    input.def("get_shape",
+              &ngraph::Input<ngraph::Node>::get_shape,
+              R"(
+                The shape of the input referred to by this input handle.
+
+                Returns
+                ----------
+                get_shape : Shape
+                    Shape of the input.
+              )");
+    input.def("get_partial_shape",
+              &ngraph::Input<ngraph::Node>::get_partial_shape,
+              R"(
+                The partial shape of the input referred to by this input handle.
+
+                Returns
+                ----------
+                get_partial_shape : PartialShape
+                    PartialShape of the input.
+              )");
+    input.def("get_source_output",
+              &ngraph::Input<ngraph::Node>::get_source_output,
+              R"(
+                A handle to the output that is connected to this input.
+
+                Returns
+                ----------
+                get_source_output : Output
+                    Output that is connected to the input.
+              )");
 }
diff --git a/ngraph/python/src/pyngraph/node_output.cpp b/ngraph/python/src/pyngraph/node_output.cpp
index 21bd9cbcde1..a39d7bf2527 100644
--- a/ngraph/python/src/pyngraph/node_output.cpp
+++ b/ngraph/python/src/pyngraph/node_output.cpp
@@ -16,10 +16,64 @@ void regclass_pyngraph_Output(py::module m)
         m, "Output", py::dynamic_attr());
     output.doc() = "ngraph.impl.Output wraps ngraph::Output<Node>";
 
-    output.def("get_node", &ngraph::Output<ngraph::Node>::get_node);
-    output.def("get_index", &ngraph::Output<ngraph::Node>::get_index);
-    output.def("get_element_type", &ngraph::Output<ngraph::Node>::get_element_type);
-    output.def("get_shape", &ngraph::Output<ngraph::Node>::get_shape);
-    output.def("get_partial_shape", &ngraph::Output<ngraph::Node>::get_partial_shape);
-    output.def("get_target_inputs", &ngraph::Output<ngraph::Node>::get_target_inputs);
+    output.def("get_node",
+               &ngraph::Output<ngraph::Node>::get_node,
+               R"(
+                Get node referenced by this output handle.
+
+                Returns
+                ----------
+                get_node : Node
+                    Node object referenced by this output handle.
+               )");
+    output.def("get_index",
+               &ngraph::Output<ngraph::Node>::get_index,
+               R"(
+                The index of the output referred to by this output handle.
+
+                Returns
+                ----------
+                get_index : int
+                    Index value as integer.
+               )");
+    output.def("get_element_type",
+               &ngraph::Output<ngraph::Node>::get_element_type,
+               R"(
+                The element type of the output referred to by this output handle.
+
+                Returns
+                ----------
+                get_element_type : Type
+                    Type of the output.
+               )");
+    output.def("get_shape",
+               &ngraph::Output<ngraph::Node>::get_shape,
+               R"(
+                The shape of the output referred to by this output handle.
+
+                Returns
+                ----------
+                get_shape : Shape
+                    Shape of the output.
+               )");
+    output.def("get_partial_shape",
+               &ngraph::Output<ngraph::Node>::get_partial_shape,
+               R"(
+                The partial shape of the output referred to by this output handle.
+
+                Returns
+                ----------
+                get_partial_shape : PartialShape
+                    PartialShape of the output.
+               )");
+    output.def("get_target_inputs",
+               &ngraph::Output<ngraph::Node>::get_target_inputs,
+               R"(
+                A set containing handles for all inputs targeted by the output
+                referenced by this output handle.
+                Returns
+                ----------
+                get_target_inputs : Set[Input]
+                    Set of Inputs.
+               )");
 }
diff --git a/ngraph/python/src/pyngraph/partial_shape.cpp b/ngraph/python/src/pyngraph/partial_shape.cpp
index ce8700e9817..1a269434486 100644
--- a/ngraph/python/src/pyngraph/partial_shape.cpp
+++ b/ngraph/python/src/pyngraph/partial_shape.cpp
@@ -35,19 +35,130 @@ void regclass_pyngraph_PartialShape(py::module m)
 
     shape.def_static("dynamic", &ngraph::PartialShape::dynamic, py::arg("r") = ngraph::Dimension());
 
-    shape.def_property_readonly("is_dynamic", &ngraph::PartialShape::is_dynamic);
-    shape.def_property_readonly("is_static", &ngraph::PartialShape::is_static);
-    shape.def_property_readonly("rank", &ngraph::PartialShape::rank);
-    shape.def_property_readonly("all_non_negative", &ngraph::PartialShape::all_non_negative);
+    shape.def_property_readonly("is_dynamic",
+                                &ngraph::PartialShape::is_dynamic,
+                                R"(
+                                    False if this shape is static, else True.
+                                    A shape is considered static if it has static rank,
+                                    and all dimensions of the shape are static.
+                                )");
+    shape.def_property_readonly("is_static",
+                                &ngraph::PartialShape::is_static,
+                                R"(
+                                    True if this shape is static, else False.
+                                    A shape is considered static if it has static rank, 
+                                    and all dimensions of the shape are static.
+                                )");
+    shape.def_property_readonly("rank",
+                                &ngraph::PartialShape::rank,
+                                R"(
+                                    The rank of the shape.
+                                )");
+    shape.def_property_readonly("all_non_negative",
+                                &ngraph::PartialShape::all_non_negative,
+                                R"(
+                                    True if all static dimensions of the tensor are 
+                                    non-negative, else False.
+                                )");
 
-    shape.def("compatible", &ngraph::PartialShape::compatible);
-    shape.def("refines", &ngraph::PartialShape::refines);
-    shape.def("relaxes", &ngraph::PartialShape::relaxes);
-    shape.def("same_scheme", &ngraph::PartialShape::same_scheme);
-    shape.def("get_max_shape", &ngraph::PartialShape::get_max_shape);
-    shape.def("get_min_shape", &ngraph::PartialShape::get_min_shape);
-    shape.def("get_shape", &ngraph::PartialShape::get_shape);
-    shape.def("to_shape", &ngraph::PartialShape::to_shape);
+    shape.def("compatible",
+              &ngraph::PartialShape::compatible,
+              py::arg("s"),
+              R"(
+                Check whether this shape is compatible with the argument, i.e.,
+                whether it is possible to merge them.
+                
+                Parameters
+                ----------
+                s : PartialShape
+                    The shape to be checked for compatibility with this shape.
+
+
+                Returns
+                ----------
+                compatible : bool
+                    True if this shape is compatible with s, else False.
+              )");
+    shape.def("refines",
+              &ngraph::PartialShape::refines,
+              py::arg("s"),
+              R"(
+                Check whether this shape is a refinement of the argument.
+
+                Parameters
+                ----------
+                s : PartialShape
+                    The shape which is being compared against this shape.        
+        
+                Returns
+                ----------
+                refines : bool
+                    True if this shape refines s, else False.
+              )");
+    shape.def("relaxes",
+              &ngraph::PartialShape::relaxes,
+              py::arg("s"),
+              R"(
+                Check whether this shape is a relaxation of the argument.
+
+                Parameters
+                ----------
+                s : PartialShape
+                    The shape which is being compared against this shape.        
+        
+                Returns
+                ----------
+                relaxes : bool
+                    True if this shape relaxes s, else False.
+              )");
+    shape.def("same_scheme",
+              &ngraph::PartialShape::same_scheme,
+              py::arg("s"),
+              R"(
+                Check whether this shape represents the same scheme as the argument.
+
+                Parameters
+                ----------
+                s : PartialShape
+                    The shape which is being compared against this shape.        
+        
+                Returns
+                ----------
+                same_scheme : bool
+                    True if shape represents the same scheme as s, else False.
+              )");
+    shape.def("get_max_shape",
+              &ngraph::PartialShape::get_max_shape,
+              R"(
+                Returns
+                ----------
+                get_max_shape : Shape
+                    Get the max bounding shape.
+              )");
+    shape.def("get_min_shape",
+              &ngraph::PartialShape::get_min_shape,
+              R"(
+                Returns
+                ----------
+                get_min_shape : Shape
+                    Get the min bounding shape.
+              )");
+    shape.def("get_shape",
+              &ngraph::PartialShape::get_shape,
+              R"(
+                Returns
+                ----------
+                get_shape : Shape
+                    Get the unique shape.
+              )");
+    shape.def("to_shape",
+              &ngraph::PartialShape::to_shape,
+              R"(
+                Returns
+                ----------
+                to_shapess : Shape
+                    Get the unique shape.
+              )");
 
     shape.def(
         "__eq__",
diff --git a/ngraph/python/src/pyngraph/pyngraph.cpp b/ngraph/python/src/pyngraph/pyngraph.cpp
index ebd80b31bb0..92b507b6483 100644
--- a/ngraph/python/src/pyngraph/pyngraph.cpp
+++ b/ngraph/python/src/pyngraph/pyngraph.cpp
@@ -36,20 +36,19 @@ PYBIND11_MODULE(_pyngraph, m)
 {
     m.doc() = "Package ngraph.impl that wraps nGraph's namespace ngraph";
     regclass_pyngraph_PyRTMap(m);
+    regmodule_pyngraph_types(m);
+    regclass_pyngraph_Dimension(m); // Dimension must be registered before PartialShape
+    regclass_pyngraph_Shape(m);
+    regclass_pyngraph_PartialShape(m);
     regclass_pyngraph_Node(m);
     regclass_pyngraph_Input(m);
     regclass_pyngraph_Output(m);
     regclass_pyngraph_NodeFactory(m);
-    regclass_pyngraph_Dimension(m); // Dimension must be registered before PartialShape
-    regclass_pyngraph_PartialShape(m);
-    regclass_pyngraph_Shape(m);
     regclass_pyngraph_Strides(m);
     regclass_pyngraph_CoordinateDiff(m);
     regclass_pyngraph_AxisSet(m);
     regclass_pyngraph_AxisVector(m);
     regclass_pyngraph_Coordinate(m);
-    regmodule_pyngraph_types(m);
-    regclass_pyngraph_Function(m);
     py::module m_op = m.def_submodule("op", "Package ngraph.impl.op that wraps ngraph::op");
     regclass_pyngraph_op_Constant(m_op);
     regclass_pyngraph_op_Parameter(m_op);
@@ -58,6 +57,7 @@ PYBIND11_MODULE(_pyngraph, m)
     regmodule_pyngraph_onnx_import(m);
 #endif
     regmodule_pyngraph_op_util(m_op);
+    regclass_pyngraph_Function(m);
     regmodule_pyngraph_passes(m);
     regmodule_pyngraph_util(m);
     regclass_pyngraph_Variant(m);
diff --git a/ngraph/python/src/pyngraph/shape.cpp b/ngraph/python/src/pyngraph/shape.cpp
index 57e89b64c96..9702387294e 100644
--- a/ngraph/python/src/pyngraph/shape.cpp
+++ b/ngraph/python/src/pyngraph/shape.cpp
@@ -18,9 +18,12 @@ void regclass_pyngraph_Shape(py::module m)
 {
     py::class_<ngraph::Shape, std::shared_ptr<ngraph::Shape>> shape(m, "Shape");
     shape.doc() = "ngraph.impl.Shape wraps ngraph::Shape";
-    shape.def(py::init<const std::initializer_list<size_t>&>());
-    shape.def(py::init<const std::vector<size_t>&>());
-    shape.def(py::init<const ngraph::Shape&>());
+    shape.def(py::init<const std::initializer_list<size_t>&>(),
+              py::arg("axis_lengths"));
+    shape.def(py::init<const std::vector<size_t>&>(),
+              py::arg("axis_lengths"));
+    shape.def(py::init<const ngraph::Shape&>(),
+              py::arg("axis_lengths"));
     shape.def("__len__", [](const ngraph::Shape& v) { return v.size(); });
     shape.def("__getitem__", [](const ngraph::Shape& v, int key) { return v[key]; });
 
diff --git a/ngraph/python/src/pyngraph/strides.cpp b/ngraph/python/src/pyngraph/strides.cpp
index 21c4880d6a8..a33f1a19374 100644
--- a/ngraph/python/src/pyngraph/strides.cpp
+++ b/ngraph/python/src/pyngraph/strides.cpp
@@ -18,9 +18,12 @@ void regclass_pyngraph_Strides(py::module m)
 {
     py::class_<ngraph::Strides, std::shared_ptr<ngraph::Strides>> strides(m, "Strides");
     strides.doc() = "ngraph.impl.Strides wraps ngraph::Strides";
-    strides.def(py::init<const std::initializer_list<size_t>&>());
-    strides.def(py::init<const std::vector<size_t>&>());
-    strides.def(py::init<const ngraph::Strides&>());
+    strides.def(py::init<const std::initializer_list<size_t>&>(),
+                py::arg("axis_strides"));
+    strides.def(py::init<const std::vector<size_t>&>(),
+                py::arg("axis_strides"));
+    strides.def(py::init<const ngraph::Strides&>(),
+                py::arg("axis_strides"));
 
     strides.def("__str__", [](const ngraph::Strides& self) -> std::string {
         std::stringstream stringstream;
diff --git a/ngraph/python/src/pyngraph/variant.hpp b/ngraph/python/src/pyngraph/variant.hpp
index ac1b5ddb138..1027abcae7d 100644
--- a/ngraph/python/src/pyngraph/variant.hpp
+++ b/ngraph/python/src/pyngraph/variant.hpp
@@ -52,8 +52,21 @@ extern void regclass_pyngraph_VariantWrapper(py::module m, std::string typestrin
     });
 
     variant_wrapper.def("get",
-                        (VT & (ngraph::VariantWrapper<VT>::*)()) & ngraph::VariantWrapper<VT>::get);
-    variant_wrapper.def("set", &ngraph::VariantWrapper<VT>::set);
+                        (VT & (ngraph::VariantWrapper<VT>::*)()) & ngraph::VariantWrapper<VT>::get,
+                        R"(
+                            Returns
+                            ----------
+                            get : Variant
+                                Value of Variant.
+                        )");
+    variant_wrapper.def("set",
+                        &ngraph::VariantWrapper<VT>::set,
+                        R"(
+                            Parameters
+                            ----------
+                            set : str or int
+                                Value to be set in Variant.
+                        )");
 
     variant_wrapper.def_property("value",
                                  (VT & (ngraph::VariantWrapper<VT>::*)()) &

From 67e2a17db16ea51b118064eddf703da304e8c654 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 5 May 2021 14:08:07 +0300
Subject: [PATCH 45/73] Reuse existing cmake variables (#5511)

---
 cmake/dependencies.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake
index 6cb15a077e9..ab5c703cd30 100644
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@@ -4,7 +4,7 @@
 
 set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}")
 
-if(CMAKE_CROSSCOMPILING AND CMAKE_HOST_SYSTEM_NAME MATCHES Linux AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+if(CMAKE_CROSSCOMPILING AND LINUX AND X86_64)
     set(protoc_version "3.7.1")
 
     RESOLVE_DEPENDENCY(SYSTEM_PROTOC_ROOT

From 98783b88ac2c90a0056a7aa4408e72ef37e83ff4 Mon Sep 17 00:00:00 2001
From: Evgeny Lazarev <evgeny.lazarev@intel.com>
Date: Wed, 5 May 2021 14:24:05 +0300
Subject: [PATCH 46/73] Removed transformation which removes Const->Result
 sub-graphs (#5295)

* Removed transformation which removes Const->Result sub-graphs

* Removed one more MO transformation which removes Const->Result sub-graph during the front phase
---
 model-optimizer/automation/package_BOM.txt    |   1 -
 .../back/SpecialNodesFinalization.py          |  47 -----
 model-optimizer/extensions/front/YOLO.py      |   3 +-
 .../front/standalone_const_eraser.py          |  30 ---
 .../extensions/front/tf/ObjectDetectionAPI.py |   3 +-
 .../front/tf/SSDToolboxDetectionOutput.py     |   4 -
 .../back/SpecialNodesFinalization_test.py     | 176 +-----------------
 7 files changed, 3 insertions(+), 261 deletions(-)
 delete mode 100644 model-optimizer/extensions/front/standalone_const_eraser.py

diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index 1225f24be27..3e6f79e9f10 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -363,7 +363,6 @@ extensions/front/Softplus_fusion.py
 extensions/front/softsign_replacer.py
 extensions/front/split_normalizer.py
 extensions/front/SqueezeNormalize.py
-extensions/front/standalone_const_eraser.py
 extensions/front/sub.py
 extensions/front/Swish_fusion.py
 extensions/front/tf/__init__.py
diff --git a/model-optimizer/extensions/back/SpecialNodesFinalization.py b/model-optimizer/extensions/back/SpecialNodesFinalization.py
index 01e95cbede9..11079fecac0 100644
--- a/model-optimizer/extensions/back/SpecialNodesFinalization.py
+++ b/model-optimizer/extensions/back/SpecialNodesFinalization.py
@@ -103,53 +103,6 @@ class CreateConstNodesReplacement(BackReplacementPattern):
                 )
 
 
-class RemoveConstToResult(BackReplacementPattern):
-    """
-    Transformation looks for a constant sub-graph followed by Result operation.
-    If sub-graph is Const->data->Result -- then all three nodes are removed.
-    If there is more complex constant sub-graph -- then only Result node is removed.
-    If Result node has keep_output_port attribute True the node will not to be removed from graph but
-    the Result node will not to be saved to IR. Only port will be kept in IR.
-
-    Currently IE is unable to handle such graph so this transformation is a work around for such case.
-    For instance, this case appears for Wide and Deep model.
-    """
-    enabled = True
-    force_clean_up = True
-    # TODO: remove this transformation once all plugins support constant value network.
-    # Do not run recursively since Const->Result sub-graph can be encountered in a body graph of Loop node
-    # and this sub-graph is needed to avoid dynamism created by Loop node
-    # in case using axis in output port map
-    run_not_recursively = True
-
-    @staticmethod
-    def pattern():
-        return dict(
-            nodes=[
-                ('const_data', {'kind': 'data', 'value': lambda value: value is not None}),
-                ('result_node', {'type': 'Result', 'kind': 'op',
-                                 'keep_output_port': lambda attr: not attr}),
-            ],
-            edges=[
-                ('const_data', 'result_node')
-            ]
-        )
-
-    @staticmethod
-    def replace_pattern(graph: Graph, match: dict):
-        const_data_node = match['const_data']
-        result_node = match['result_node']
-        nodes_to_remove = [result_node.id]
-
-        # in case only const data consumer that is the result node, remove the whole sub-graph
-        parent_node = result_node.in_port(0).get_source().node
-        if parent_node.soft_get('type') == 'Const' and len(parent_node.out_port(0).get_destinations()) == 1:
-            nodes_to_remove.append(parent_node.id)
-            nodes_to_remove.append(const_data_node.id)
-
-        graph.remove_nodes_from(nodes_to_remove)
-
-
 class NormalizeTI(BackReplacementPattern):
     """
     Transformation changes linking mechanism of TensorIterator outer graph with inner graph
diff --git a/model-optimizer/extensions/front/YOLO.py b/model-optimizer/extensions/front/YOLO.py
index 867e839fe3c..a78671d1dc1 100644
--- a/model-optimizer/extensions/front/YOLO.py
+++ b/model-optimizer/extensions/front/YOLO.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from extensions.front.no_op_eraser import NoOpEraser
-from extensions.front.standalone_const_eraser import StandaloneConstEraser
 from extensions.ops.regionyolo import RegionYoloOp
 from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
 from mo.graph.graph import Node, Graph
@@ -18,7 +17,7 @@ class YoloRegionAddon(FrontReplacementFromConfigFileGeneral):
     replacement_id = 'TFYOLO'
 
     def run_after(self):
-        return [NoOpEraser, StandaloneConstEraser]
+        return [NoOpEraser]
 
     def transform_graph(self, graph: Graph, replacement_descriptions):
         op_outputs = [n for n, d in graph.nodes(data=True) if 'op' in d and d['op'] == 'Result']
diff --git a/model-optimizer/extensions/front/standalone_const_eraser.py b/model-optimizer/extensions/front/standalone_const_eraser.py
deleted file mode 100644
index 7b01d34d57d..00000000000
--- a/model-optimizer/extensions/front/standalone_const_eraser.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (C) 2018-2021 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import logging as log
-
-from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import Graph
-
-
-class StandaloneConstEraser(FrontReplacementSubgraph):
-    enabled = True
-    # TODO: remove this transformation once all plugins support constant value network.
-    # Now it avoids to be run recursively since Const->Result sub-graph can be encountered in a body graph of Loop node
-    run_not_recursively = True
-
-    @staticmethod
-    def pattern():
-        return dict(
-            nodes=[('const', dict(kind='op', op='Const')),
-                   ('output', dict(kind='op', op='Result'))
-                   ],
-            edges=[('const', 'output')]
-        )
-
-    @staticmethod
-    def replace_sub_graph(graph: Graph, match: dict):
-        if not len(match['const'].in_edges()) and len(match['const'].out_edges()) == 1:
-            graph.erase_node(match['const'])
-            graph.erase_node(match['output'])
-            log.info("Standalone Const node \"{}\" was removed from the graph".format(match['const'].id))
diff --git a/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py b/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
index 449b9e57983..ecc9ff04290 100644
--- a/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
+++ b/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
@@ -9,7 +9,6 @@ import numpy as np
 from extensions.front.Pack import Pack
 from extensions.front.TransposeOrderNormalizer import TransposeOrderNormalizer
 from extensions.front.split_normalizer import SqueezeAxis
-from extensions.front.standalone_const_eraser import StandaloneConstEraser
 from extensions.front.tf.CropAndResizeReplacement import CropAndResizeReplacement
 from extensions.front.tf.FakeQuantWithMinMaxVars import FakeQuantWithMinMaxVarsToQuantize
 from extensions.front.tf.KerasRNNTransformation import KerasRNNInputSlicing, KerasRNNOutputConcatenation
@@ -529,7 +528,7 @@ class ObjectDetectionAPITransformationsFinish(FrontReplacementPattern):
         # But the inputs corresponding to padding values is re-used as inputs for newly created Pad node. This input
         # is removed during removing nodes from the DO sub-graph so the first input to Transpose is missing which
         # results in TransposeOrderNormalizer transformation failure.
-        return [Pack, TransposeOrderNormalizer, PadTFToPad, SqueezeAxis, StandaloneConstEraser, TFSliceToSliceReplacer,
+        return [Pack, TransposeOrderNormalizer, PadTFToPad, SqueezeAxis, TFSliceToSliceReplacer,
                 KerasRNNOutputConcatenation, KerasRNNInputSlicing]
 
     def find_and_replace_pattern(self, graph: Graph):
diff --git a/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py b/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py
index af24927e0bf..ac5259c884a 100644
--- a/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py
+++ b/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py
@@ -1,7 +1,6 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from extensions.front.standalone_const_eraser import StandaloneConstEraser
 from extensions.ops.DetectionOutput import DetectionOutput
 from mo.front.common.partial_infer.utils import int64_array
 from mo.front.subgraph_matcher import SubgraphMatch
@@ -16,9 +15,6 @@ from mo.ops.result import Result
 class SSDToolboxDetectionOutputReplacement(FrontReplacementFromConfigFileSubGraph):
     replacement_id = 'SSDToolboxDetectionOutput'
 
-    def run_before(self):
-        return [StandaloneConstEraser]
-
     def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
         return []
 
diff --git a/model-optimizer/unit_tests/extensions/back/SpecialNodesFinalization_test.py b/model-optimizer/unit_tests/extensions/back/SpecialNodesFinalization_test.py
index efcc9042b30..3fea16eec08 100644
--- a/model-optimizer/unit_tests/extensions/back/SpecialNodesFinalization_test.py
+++ b/model-optimizer/unit_tests/extensions/back/SpecialNodesFinalization_test.py
@@ -5,7 +5,7 @@ import unittest
 
 import numpy as np
 
-from extensions.back.SpecialNodesFinalization import CreateConstNodesReplacement, RemoveConstToResult
+from extensions.back.SpecialNodesFinalization import CreateConstNodesReplacement
 from mo.utils.ir_engine.compare_graphs import compare_graphs
 from unit_tests.utils.graph import build_graph_with_attrs
 
@@ -99,177 +99,3 @@ class CreateConstNodesReplacementTest(unittest.TestCase):
         tested_pattern.find_and_replace_pattern(graph)
         (flag, resp) = compare_graphs(graph, graph_ref, last_node='next_node')
         self.assertTrue(flag, resp)
-
-
-class RemoveConstToResultReplacementTest(unittest.TestCase):
-    def test_only_consumer(self):
-        """Result node is only consumer of Const data node"""
-        nodes = [
-            ('const_node', {'type': 'Const', 'kind': 'op'}),
-            ('const_data', {'kind': 'data', 'value': np.array(5)}),
-            ('result_node', {'type': 'Result', 'kind': 'op', 'keep_output_port': False}),
-
-            ('placeholder_1', {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}),
-            ('placeholder_1_data', {'kind': 'data'}),
-            ('relu_1', {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'}),
-            ('relu_1_data', {'kind': 'data'}),
-        ]
-        edges = [
-            ('const_node', 'const_data'),
-            ('const_data', 'result_node'),
-
-            ('placeholder_1', 'placeholder_1_data'),
-            ('placeholder_1_data', 'relu_1'),
-            ('relu_1', 'relu_1_data')
-        ]
-        new_nodes=[
-            ('placeholder_1', {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}),
-            ('placeholder_1_data', {'kind': 'data'}),
-            ('relu_1', {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'}),
-            ('relu_1_data', {'kind': 'data'}),
-        ]
-        new_edges=[
-            ('placeholder_1', 'placeholder_1_data'),
-            ('placeholder_1_data', 'relu_1'),
-            ('relu_1', 'relu_1_data')
-        ]
-
-        graph = build_graph_with_attrs(
-            nodes_with_attrs=nodes,
-            edges_with_attrs=edges,
-        )
-        graph_ref = build_graph_with_attrs(
-            nodes_with_attrs=new_nodes,
-            edges_with_attrs=new_edges,
-        )
-        tested_pattern = RemoveConstToResult()
-        tested_pattern.find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, last_node='relu_1_data')
-        self.assertTrue(flag, resp)
-        self.assertNotIn('const_node', graph.node)
-        self.assertNotIn('const_data', graph.node)
-        self.assertNotIn('result_node', graph.node)
-
-
-    def test_only_consumer_keep_result(self):
-        """Result node is only consumer of Const data node"""
-        nodes = [
-            ('placeholder_1', {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}),
-            ('placeholder_1_data', {'kind': 'data'}),
-            ('placeholder_2', {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}),
-            ('placeholder_2_data', {'kind': 'data'}),
-            ('shape_of', {'type': 'ShapeOf', 'kind': 'op', 'op': 'ShapeOf'}),
-            ('shape_of_data', {'kind': 'data'}),
-            ('split', {'type': 'Split', 'kind': 'op', 'op': 'Split'}),
-            ('split_data1', {'kind': 'data'}),
-            ('split_data2', {'kind': 'data'}),
-            ('result_node1', {'type': 'Result', 'kind': 'op', 'keep_output_port': True}),
-
-            ('mul', {'type': 'Mul', 'kind': 'op', 'op': 'Mul'}),
-            ('mul_data', {'kind': 'data'}),
-            ('result_node2', {'type': 'Result', 'kind': 'op'}),
-        ]
-        edges = [
-            ('placeholder_1', 'placeholder_1_data'),
-            ('placeholder_2', 'placeholder_2_data'),
-            ('placeholder_1_data', 'shape_of'),
-            ('shape_of', 'shape_of_data'),
-            ('shape_of_data', 'split'),
-            ('split', 'split_data1', {'in': 0}),
-            ('split', 'split_data2', {'in': 1}),
-
-            ('split_data1', 'result_node1'),
-            ('split_data2', 'mul'),
-            ('placeholder_2_data', 'mul'),
-            ('mul', 'mul_data'),
-            ('mul_data', 'result_node2'),
-        ]
-        
-        graph = build_graph_with_attrs(
-            nodes_with_attrs=nodes,
-            edges_with_attrs=edges,
-        )
-        graph_ref = build_graph_with_attrs(
-            nodes_with_attrs=nodes,
-            edges_with_attrs=edges,
-        )
-        tested_pattern = RemoveConstToResult()
-        tested_pattern.find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, last_node='mul_data')
-        self.assertTrue(flag, resp)
-        self.assertIn('split_data1', graph.node)
-        self.assertIn('split_data2', graph.node)
-        self.assertIn('result_node1', graph.node)
-
-
-    def test_two_consumers(self):
-        """Const data node has two consumers: Result and ReLu"""
-        nodes = [
-            ('const_node', {'type': 'Const', 'kind': 'op'}),
-            ('const_data', {'kind': 'data', 'value': np.array(5)}),
-            ('result_node', {'type': 'Result', 'kind': 'op'}),
-            ('relu_1', {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'}),
-            ('relu_1_data', {'kind': 'data'}),
-        ]
-        edges = [
-            ('const_node', 'const_data'),
-            ('const_data', 'result_node'),
-            ('const_data', 'relu_1'),
-            ('relu_1', 'relu_1_data')
-        ]
-        new_nodes=[
-            ('const_node', {'type': 'Const', 'kind': 'op'}),
-            ('const_data', {'kind': 'data'}),
-            ('relu_1', {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'}),
-            ('relu_1_data', {'kind': 'data'}),
-        ]
-        new_edges=[
-            ('const_node', 'const_data'),
-            ('const_data', 'relu_1'),
-            ('relu_1', 'relu_1_data')
-        ]
-
-        graph = build_graph_with_attrs(
-            nodes_with_attrs=nodes,
-            edges_with_attrs=edges,
-        )
-        graph_ref = build_graph_with_attrs(
-            nodes_with_attrs=new_nodes,
-            edges_with_attrs=new_edges,
-        )
-        tested_pattern = RemoveConstToResult()
-        tested_pattern.find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, last_node='relu_1_data')
-        self.assertTrue(flag, resp)
-        self.assertNotIn('result_node', graph.node)
-
-
-    def test_two_consumers_keep_outputs(self):
-        """Const data node has two consumers: Result and ReLu"""
-        nodes = [
-            ('const_node', {'type': 'Const', 'kind': 'op'}),
-            ('const_data', {'kind': 'data', 'value': np.array(5)}),
-            ('result_node', {'type': 'Result', 'kind': 'op', 'keep_output_port': True}),
-            ('relu_1', {'type': 'ReLU', 'kind': 'op', 'op': 'ReLU'}),
-            ('relu_1_data', {'kind': 'data'}),
-        ]
-        edges = [
-            ('const_node', 'const_data'),
-            ('const_data', 'result_node'),
-            ('const_data', 'relu_1'),
-            ('relu_1', 'relu_1_data')
-        ]
-
-        graph = build_graph_with_attrs(
-            nodes_with_attrs=nodes,
-            edges_with_attrs=edges,
-        )
-        graph_ref = build_graph_with_attrs(
-            nodes_with_attrs=nodes,
-            edges_with_attrs=edges,
-        )
-        tested_pattern = RemoveConstToResult()
-        tested_pattern.find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, last_node='relu_1_data')
-        self.assertTrue(flag, resp)
-        self.assertIn('result_node', graph.node)

From dc49035eaaa59d07b6a1b8ad1ca752847e968e36 Mon Sep 17 00:00:00 2001
From: Patryk Elszkowski <patryk.elszkowski@intel.com>
Date: Wed, 5 May 2021 13:27:51 +0200
Subject: [PATCH 47/73] update tanh v1 spec (#5413)

* update tanh v1 spec

* Apply review suggestions

* add rounding info

* Move Tanh to activation functions

* reorganize spec and add rounding rule for integers

* back to arithemtic functions

* Update `short description` to adjust with new EW description template

Co-authored-by: Patryk Elszkowski <patryk.elszkowki@intel.com>
---
 docs/ops/arithmetic/Tanh_1.md | 54 ++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 14 deletions(-)

diff --git a/docs/ops/arithmetic/Tanh_1.md b/docs/ops/arithmetic/Tanh_1.md
index d50fe2fdade..c5c77dbe0ab 100644
--- a/docs/ops/arithmetic/Tanh_1.md
+++ b/docs/ops/arithmetic/Tanh_1.md
@@ -2,24 +2,50 @@
 
 **Versioned name**: *Tanh-1*
 
-**Category**: *Activation function*
+**Category**: *Arithmetic function*
 
-**Short description**: Tanh element-wise activation function.
-
-**Attributes**: has no attributes
-
-**Inputs**:
-
-*   **1**: Input tensor x of any floating point type. Required.
-
-**Outputs**:
-
-*   **1**: Result of Tanh function applied to the input tensor *x*. Floating point tensor with shape and type matching the input tensor.
+**Short description**: *Tanh* performs element-wise hyperbolic tangent (tanh) operation with given tensor.
 
 **Detailed description**
 
-For each element from the input tensor calculates corresponding
-element in the output tensor with the following formula:
+For each element from the input tensor calculates corresponding element in the output tensor with the following formula:
 \f[
 tanh ( x ) = \frac{2}{1+e^{-2x}} - 1 = 2sigmoid(2x) - 1
 \f]
+
+* For integer element type the result is rounded (half up) to the nearest integer value.
+
+**Attributes**: *Tanh* operation has no attributes.
+
+**Inputs**:
+
+* **1**: A tensor of type *T* and arbitrary shape. **Required.**
+
+**Outputs**:
+
+* **1**: The result of element-wise *Tanh* operation. A tensor of type *T* and the same shape as input tensor.
+**Types**
+
+* *T*: any numeric type.
+
+
+**Examples**
+
+*Example 1*
+
+```xml
+<layer ... type="Tanh">
+    <input>
+        <port id="0">
+            <dim>256</dim>
+            <dim>56</dim>
+        </port>
+    </input>
+    <output>
+        <port id="1">
+            <dim>256</dim>
+            <dim>56</dim>
+        </port>
+    </output>
+</layer>
+```

From c362b3e105fcf1ca6ea71db3b71b841234ed13a7 Mon Sep 17 00:00:00 2001
From: Tomasz Socha <tomasz.socha@intel.com>
Date: Wed, 5 May 2021 13:34:26 +0200
Subject: [PATCH 48/73] [ONNX] Fix missmatches for opset13 *Max operators.
 (#5443)

---
 .../frontend/onnx_import/src/op/hardmax.cpp   | 56 +++++++++++++++----
 .../frontend/onnx_import/src/op/hardmax.hpp   |  6 +-
 .../onnx_import/src/op/log_softmax.cpp        |  7 ++-
 .../frontend/onnx_import/src/op/softmax.cpp   | 13 +++++
 .../frontend/onnx_import/src/op/softmax.hpp   |  6 ++
 .../frontend/onnx_import/src/ops_bridge.cpp   |  2 +
 ngraph/python/tests/__init__.py               |  1 -
 ngraph/python/tests/test_onnx/test_backend.py |  8 ---
 .../python/tests/test_onnx/test_ops_unary.py  | 28 +++++-----
 9 files changed, 90 insertions(+), 37 deletions(-)

diff --git a/ngraph/frontend/onnx_import/src/op/hardmax.cpp b/ngraph/frontend/onnx_import/src/op/hardmax.cpp
index 9aa95d9ff55..edd47d1f444 100644
--- a/ngraph/frontend/onnx_import/src/op/hardmax.cpp
+++ b/ngraph/frontend/onnx_import/src/op/hardmax.cpp
@@ -60,22 +60,54 @@ namespace ngraph
                     const auto converted_results =
                         std::make_shared<default_opset::Convert>(results, input.get_element_type());
 
-                    if (input_shape.is_static())
-                    {
-                        return {ngraph::builder::opset1::reshape(converted_results,
-                                                                 input_shape.to_shape())};
-                    }
-                    else
-                    {
-                        const auto output_shape = std::make_shared<default_opset::ShapeOf>(input);
-                        return {
-                            std::make_shared<default_opset::Reshape>(input, output_shape, false)};
-                    }
+                    const auto output_shape = std::make_shared<default_opset::ShapeOf>(input);
+                    return {std::make_shared<default_opset::Reshape>(
+                        converted_results, output_shape, false)};
                 }
 
             } // namespace set_1
+            namespace set_13
+            {
+                OutputVector hardmax(const Node& node)
+                {
+                    const auto input = node.get_ng_inputs().at(0);
+                    const auto& input_shape = input.get_partial_shape();
 
-        } // namespace op
+                    auto axis = node.get_attribute_value<std::int64_t>("axis", -1);
+                    axis = ngraph::normalize_axis(node.get_description(), axis, input_shape.rank());
+
+                    const auto input_runtime_shape =
+                        std::make_shared<default_opset::ShapeOf>(input);
+                    Output<ngraph::Node> row_size = std::make_shared<default_opset::Gather>(
+                        input_runtime_shape,
+                        default_opset::Constant::create(element::i64, {1}, {axis}),
+                        default_opset::Constant::create(element::i64, {}, {0}));
+                    row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size);
+
+                    const auto topk = std::make_shared<default_opset::TopK>(
+                        input,
+                        default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}),
+                        axis,
+                        default_opset::TopK::Mode::MAX,
+                        default_opset::TopK::SortType::NONE);
+
+                    const auto on_value =
+                        default_opset::Constant::create(ngraph::element::i64, Shape{}, {1});
+                    const auto off_value =
+                        default_opset::Constant::create(ngraph::element::i64, Shape{}, {0});
+
+                    const auto results = std::make_shared<default_opset::OneHot>(
+                        topk->output(1), row_size, on_value, off_value, axis);
+                    const auto converted_results =
+                        std::make_shared<default_opset::Convert>(results, input.get_element_type());
+
+                    const auto output_shape = std::make_shared<default_opset::ShapeOf>(input);
+                    return {std::make_shared<default_opset::Reshape>(
+                        converted_results, output_shape, false)};
+                }
+
+            } // namespace set_13
+        }     // namespace op
 
     } // namespace onnx_import
 
diff --git a/ngraph/frontend/onnx_import/src/op/hardmax.hpp b/ngraph/frontend/onnx_import/src/op/hardmax.hpp
index bbf16ba21de..10cc6592774 100644
--- a/ngraph/frontend/onnx_import/src/op/hardmax.hpp
+++ b/ngraph/frontend/onnx_import/src/op/hardmax.hpp
@@ -18,7 +18,11 @@ namespace ngraph
                 OutputVector hardmax(const Node& node);
             } // namespace set_1
 
-        } // namespace op
+            namespace set_13
+            {
+                OutputVector hardmax(const Node& node);
+            } // namespace set_13
+        }     // namespace op
 
     } // namespace onnx_import
 
diff --git a/ngraph/frontend/onnx_import/src/op/log_softmax.cpp b/ngraph/frontend/onnx_import/src/op/log_softmax.cpp
index b44e8dcfcf4..84ad751a26f 100644
--- a/ngraph/frontend/onnx_import/src/op/log_softmax.cpp
+++ b/ngraph/frontend/onnx_import/src/op/log_softmax.cpp
@@ -73,7 +73,12 @@ namespace ngraph
 
             namespace set_13
             {
-                OutputVector log_softmax(const Node& node) { return detail::log_softmax(node, -1); }
+                OutputVector log_softmax(const Node& node)
+                {
+                    const auto axis = node.get_attribute_value<int64_t>("axis", -1);
+                    return {
+                        std::make_shared<default_opset::LogSoftmax>(node.get_ng_inputs()[0], axis)};
+                }
             } // namespace set_13
 
         } // namespace op
diff --git a/ngraph/frontend/onnx_import/src/op/softmax.cpp b/ngraph/frontend/onnx_import/src/op/softmax.cpp
index 24b0e9722ce..1827799b8e3 100644
--- a/ngraph/frontend/onnx_import/src/op/softmax.cpp
+++ b/ngraph/frontend/onnx_import/src/op/softmax.cpp
@@ -110,6 +110,19 @@ namespace ngraph
                     return {result};
                 }
             } // namespace set_7
+            namespace set_13
+            {
+                OutputVector softmax(const Node& node)
+                {
+                    const auto data = node.get_ng_inputs().at(0);
+
+                    const auto axis = node.get_attribute_value<int64_t>("axis", -1);
+                    const auto normalized_axis = ngraph::normalize_axis(
+                        node.get_description(), axis, data.get_partial_shape().rank());
+
+                    return {std::make_shared<default_opset::Softmax>(data, normalized_axis)};
+                }
+            } // namespace set_13
         }     // namespace op
     }         // namespace onnx_import
 } // namespace ngraph
diff --git a/ngraph/frontend/onnx_import/src/op/softmax.hpp b/ngraph/frontend/onnx_import/src/op/softmax.hpp
index bf782bd91c6..5528f3dd259 100644
--- a/ngraph/frontend/onnx_import/src/op/softmax.hpp
+++ b/ngraph/frontend/onnx_import/src/op/softmax.hpp
@@ -24,6 +24,12 @@ namespace ngraph
                 OutputVector softmax(const Node& node);
 
             } // namespace set_7
+
+            namespace set_13
+            {
+                OutputVector softmax(const Node& node);
+
+            } // namespace set_13
         }     // namespace op
 
     } // namespace onnx_import
diff --git a/ngraph/frontend/onnx_import/src/ops_bridge.cpp b/ngraph/frontend/onnx_import/src/ops_bridge.cpp
index 0d74e8d79ae..42774e93be2 100644
--- a/ngraph/frontend/onnx_import/src/ops_bridge.cpp
+++ b/ngraph/frontend/onnx_import/src/ops_bridge.cpp
@@ -360,6 +360,7 @@ namespace ngraph
             REGISTER_OPERATOR("Greater", 1, greater);
             REGISTER_OPERATOR("GRU", 1, gru);
             REGISTER_OPERATOR("Hardmax", 1, hardmax);
+            REGISTER_OPERATOR("Hardmax", 13, hardmax);
             REGISTER_OPERATOR("HardSigmoid", 1, hard_sigmoid);
             REGISTER_OPERATOR("Identity", 1, identity);
             REGISTER_OPERATOR("ImageScaler", 1, image_scaler);
@@ -438,6 +439,7 @@ namespace ngraph
             // Softmax v7 should be in the 11th opset but,
             // other frameworks(mxnet and onnxruntime) already use for older models.
             REGISTER_OPERATOR("Softmax", 7, softmax);
+            REGISTER_OPERATOR("Softmax", 13, softmax);
             REGISTER_OPERATOR("Softplus", 1, softplus);
             REGISTER_OPERATOR("Softsign", 1, softsign);
             REGISTER_OPERATOR("SpaceToDepth", 1, space_to_depth);
diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py
index e579d02dfa0..8fee2c70faf 100644
--- a/ngraph/python/tests/__init__.py
+++ b/ngraph/python/tests/__init__.py
@@ -103,7 +103,6 @@ xfail_issue_38735 = xfail_test(reason="RuntimeError: nGraph does not support the
                                       "ai.onnx.preview.training.Adagrad")
 xfail_issue_48052 = xfail_test(reason="Dropout op is not supported in traning mode")
 xfail_issue_45180 = xfail_test(reason="RuntimeError: Unsupported dynamic op: ReduceSum")
-xfail_issue_44839 = xfail_test(reason="Huge computation missmatch")
 xfail_issue_44848 = xfail_test(reason="E   Unsupported dynamic op: Range")
 xfail_issue_44851 = xfail_test(reason="E   Unsupported dynamic op: Broadcast")
 xfail_issue_44854 = xfail_test(reason="E   Unsupported dynamic op: VariadicSplit")
diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py
index e508551b2c0..3b91a18344f 100644
--- a/ngraph/python/tests/test_onnx/test_backend.py
+++ b/ngraph/python/tests/test_onnx/test_backend.py
@@ -38,7 +38,6 @@ from tests import (BACKEND_NAME,
                    xfail_issue_39659,
                    xfail_issue_39662,
                    xfail_issue_43742,
-                   xfail_issue_44839,
                    xfail_issue_44848,
                    xfail_issue_44851,
                    xfail_issue_44854,
@@ -336,13 +335,6 @@ tests_expected_to_fail = [
     (xfail_issue_43742,
      "OnnxBackendNodeModelTest.test_if_cpu",
      "OnnxBackendNodeModelTest.test_if_seq_cpu"),
-    (xfail_issue_44839,
-     "OnnxBackendNodeModelTest.test_logsoftmax_axis_0_cpu",
-     "OnnxBackendNodeModelTest.test_logsoftmax_axis_1_cpu",
-     "OnnxBackendNodeModelTest.test_softmax_default_axis_cpu",
-     "OnnxBackendNodeModelTest.test_hardmax_axis_0_cpu",
-     "OnnxBackendNodeModelTest.test_hardmax_axis_1_cpu",
-     "OnnxBackendNodeModelTest.test_hardmax_default_axis_cpu",),
     (xfail_issue_44848,
      "OnnxBackendNodeModelTest.test_range_float_type_positive_delta_cpu",
      "OnnxBackendNodeModelTest.test_range_int32_type_negative_delta_cpu",),
diff --git a/ngraph/python/tests/test_onnx/test_ops_unary.py b/ngraph/python/tests/test_onnx/test_ops_unary.py
index 582749264a6..01c9eeb9f55 100644
--- a/ngraph/python/tests/test_onnx/test_ops_unary.py
+++ b/ngraph/python/tests/test_onnx/test_ops_unary.py
@@ -175,7 +175,7 @@ def test_hardmax(axis, dim1, dim2):
     data = np.random.rand(3, 4, 5).astype(np.float32)
     expected = hardmax_2d(data.reshape(dim1, dim2)).reshape(3, 4, 5)
     node = onnx.helper.make_node("Hardmax", inputs=["x"], outputs=["y"], axis=axis)
-    ng_results = run_node(node, [data])
+    ng_results = run_node(node, [data], opset_version=12)
     assert np.allclose(ng_results, [expected])
 
 
@@ -189,24 +189,24 @@ def test_hardmax_special_cases():
     # default axis=1
     expected = hardmax_2d(data.reshape(3, 20)).reshape(3, 4, 5)
     node = onnx.helper.make_node("Hardmax", inputs=["x"], outputs=["y"])
-    ng_results = run_node(node, [data])
+    ng_results = run_node(node, [data], opset_version=12)
     assert np.allclose(ng_results, [expected])
 
     expected = hardmax_2d(data.reshape(12, 5)).reshape(3, 4, 5)
     node = onnx.helper.make_node("Hardmax", inputs=["x"], outputs=["y"], axis=-1)
-    ng_results = run_node(node, [data])
+    ng_results = run_node(node, [data], opset_version=12)
     assert np.allclose(ng_results, [expected])
 
     with pytest.raises(RuntimeError):
         node = onnx.helper.make_node("Hardmax", inputs=["x"], outputs=["y"], axis=3)
-        ng_results = run_node(node, [data])
+        ng_results = run_node(node, [data], opset_version=12)
 
     # For multiple occurrences of the maximal values, the first occurrence is selected
     # for one-hot output
     data = np.array([[3, 3, 3, 1]]).astype(np.float32)
     expected = np.array([[1, 0, 0, 0]]).astype(np.float32)
     node = onnx.helper.make_node("Hardmax", inputs=["x"], outputs=["y"])
-    ng_results = run_node(node, [data])
+    ng_results = run_node(node, [data], opset_version=12)
     assert np.allclose(ng_results, [expected])
 
 
@@ -241,27 +241,27 @@ def test_logsoftmax():
 
     node = onnx.helper.make_node("LogSoftmax", inputs=["x"], outputs=["y"], axis=0)
     expected = logsoftmax_2d(data.reshape(1, 60)).reshape(3, 4, 5)
-    ng_results = run_node(node, [data])
+    ng_results = run_node(node, [data], opset_version=12)
     assert np.allclose(ng_results, [expected])
 
     node = onnx.helper.make_node("LogSoftmax", inputs=["x"], outputs=["y"], axis=1)
     expected = logsoftmax_2d(data.reshape(3, 20)).reshape(3, 4, 5)
-    ng_results = run_node(node, [data])
+    ng_results = run_node(node, [data], opset_version=12)
+    assert np.allclose(ng_results, [expected])
+
+    # default axis is 1
+    node = onnx.helper.make_node("LogSoftmax", inputs=["x"], outputs=["y"])
+    ng_results = run_node(node, [data], opset_version=12)
     assert np.allclose(ng_results, [expected])
 
     node = onnx.helper.make_node("LogSoftmax", inputs=["x"], outputs=["y"], axis=2)
     expected = logsoftmax_2d(data.reshape(12, 5)).reshape(3, 4, 5)
-    ng_results = run_node(node, [data])
-    assert np.allclose(ng_results, [expected])
-
-    # default axis is -1
-    node = onnx.helper.make_node("LogSoftmax", inputs=["x"], outputs=["y"])
-    ng_results = run_node(node, [data])
+    ng_results = run_node(node, [data], opset_version=12)
     assert np.allclose(ng_results, [expected])
 
     with pytest.raises(RuntimeError):
         node = onnx.helper.make_node("LogSoftmax", inputs=["x"], outputs=["y"], axis=3)
-        ng_results = run_node(node, [data])
+        ng_results = run_node(node, [data], opset_version=12)
 
 
 def test_softplus():

From 1c2eaca49e9bf6128b2d2359833f2ab768f95a1e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Wed, 5 May 2021 16:25:11 +0300
Subject: [PATCH 49/73] Fixed memory leak in IR reader (#5507)

---
 inference-engine/src/readers/ir_reader/ie_ir_parser.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
index 058a856a765..e3467ba01e4 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
@@ -920,10 +920,11 @@ std::shared_ptr<ICNNNetwork> V10Parser::parse(
     XmlDeserializer visitor(root, weights, opsets, variables);
     bool use_framework_node{false};
     for (const auto & ext : _exts) {
-        InferenceEngine::Version * version = new InferenceEngine::Version();
-        ext->GetVersion(const_cast<const Version *&>(version));
-        if (version->description && strcmp(version->description, "framework_node_ext") == 0) {
+        const InferenceEngine::Version * version = nullptr;
+        ext->GetVersion(version);
+        if (version && version->description && strcmp(version->description, "framework_node_ext") == 0) {
             use_framework_node = true;
+            break;
         }
     }
     visitor.use_framework_node(use_framework_node);

From 34060eb5f853cd224d900014f63ef86e0c43a20f Mon Sep 17 00:00:00 2001
From: Patryk Elszkowski <patryk.elszkowski@intel.com>
Date: Wed, 5 May 2021 15:47:25 +0200
Subject: [PATCH 50/73] add Constant v1 to verified operations (#5412)

Co-authored-by: Patryk Elszkowski <patryk.elszkowki@intel.com>
---
 .../functional_test_utils/layer_tests_summary/utils/constants.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
index 6202322194d..4c8bf8885e5 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
@@ -18,6 +18,7 @@ VERIFIED_OP_REFERENCES = [
     'Concat-1',
     'ConvertLike-1',
     'Convolution-1',
+    'Constant-1',
     'DeformableConvolution-1',
     'DetectionOutput-1',
     'Divide-1',

From 62b1f655c0ed44144a1c9baae0978ac40066f5b5 Mon Sep 17 00:00:00 2001
From: Vladimir Gavrilov <vladimir.gavrilov@intel.com>
Date: Wed, 5 May 2021 17:17:46 +0300
Subject: [PATCH 51/73] DFT and IDFT nGraph Python API (#5387)

* Added nGraph Python API for operations DFT and IDFT.

* Written tests for the DFT Python API.

* Written tests for IDFT nGraph Python API.

* Small fixes.

* Started to add tests for the signal_size case.

* Written tests for signal_size case of DFT.

* Written tests for signal_size case of IDFT.

* Some code style fixes in IDFT nGraph Python API tests.

* Code style fixes in tests for DFT nGraph Python API.

* Now DFT nGraph Python API tests are used numpy FFT ressults as expected results of tests.

* Now IDFT nGraph Python API tests without signal_size are used numpy FFT result as input data.

* Now IDFT nGraph Python API tests use numpy IFFT as expected results for signal_size cases.

* Deleted redundant function.

* Formatting fix.

* Now test data for DFT and IDFT nGraph Python API are randomly generated.

* Added seed initialization.
---
 ngraph/python/src/ngraph/__init__.py         |   2 +
 ngraph/python/src/ngraph/opset7/__init__.py  |   2 +
 ngraph/python/src/ngraph/opset7/ops.py       |  42 +++++++
 ngraph/python/tests/__init__.py              |   2 +
 ngraph/python/tests/test_ngraph/test_dft.py  | 119 ++++++++++++++++++
 ngraph/python/tests/test_ngraph/test_idft.py | 121 +++++++++++++++++++
 6 files changed, 288 insertions(+)
 create mode 100644 ngraph/python/tests/test_ngraph/test_dft.py
 create mode 100644 ngraph/python/tests/test_ngraph/test_idft.py

diff --git a/ngraph/python/src/ngraph/__init__.py b/ngraph/python/src/ngraph/__init__.py
index c66e3ee81e0..0b276049d33 100644
--- a/ngraph/python/src/ngraph/__init__.py
+++ b/ngraph/python/src/ngraph/__init__.py
@@ -52,6 +52,7 @@ from ngraph.opset7 import deformable_convolution
 from ngraph.opset7 import deformable_psroi_pooling
 from ngraph.opset7 import depth_to_space
 from ngraph.opset7 import detection_output
+from ngraph.opset7 import dft
 from ngraph.opset7 import divide
 from ngraph.opset7 import einsum
 from ngraph.opset7 import elu
@@ -80,6 +81,7 @@ from ngraph.opset7 import gru_sequence
 from ngraph.opset7 import hard_sigmoid
 from ngraph.opset7 import hsigmoid
 from ngraph.opset7 import hswish
+from ngraph.opset7 import idft
 from ngraph.opset7 import interpolate
 from ngraph.opset7 import less
 from ngraph.opset7 import less_equal
diff --git a/ngraph/python/src/ngraph/opset7/__init__.py b/ngraph/python/src/ngraph/opset7/__init__.py
index c1ded5f9ad4..4b7e715982b 100644
--- a/ngraph/python/src/ngraph/opset7/__init__.py
+++ b/ngraph/python/src/ngraph/opset7/__init__.py
@@ -37,6 +37,7 @@ from ngraph.opset1.ops import deformable_convolution
 from ngraph.opset1.ops import deformable_psroi_pooling
 from ngraph.opset1.ops import depth_to_space
 from ngraph.opset1.ops import detection_output
+from ngraph.opset7.ops import dft
 from ngraph.opset1.ops import divide
 from ngraph.opset7.ops import einsum
 from ngraph.opset1.ops import elu
@@ -65,6 +66,7 @@ from ngraph.opset5.ops import gru_sequence
 from ngraph.opset1.ops import hard_sigmoid
 from ngraph.opset5.ops import hsigmoid
 from ngraph.opset4.ops import hswish
+from ngraph.opset7.ops import idft
 from ngraph.opset1.ops import interpolate
 from ngraph.opset1.ops import less
 from ngraph.opset1.ops import less_equal
diff --git a/ngraph/python/src/ngraph/opset7/ops.py b/ngraph/python/src/ngraph/opset7/ops.py
index 419ac419fa6..a2b0c26930f 100644
--- a/ngraph/python/src/ngraph/opset7/ops.py
+++ b/ngraph/python/src/ngraph/opset7/ops.py
@@ -101,3 +101,45 @@ def roll(
     inputs = as_nodes(data, shift, axes)
 
     return _get_node_factory_opset7().create("Roll", inputs)
+
+
+@nameable_op
+def dft(
+        data: NodeInput,
+        axes: NodeInput,
+        signal_size: Optional[NodeInput] = None,
+) -> Node:
+    """Return a node which performs DFT operation.
+
+    @param data: Tensor with transformed data.
+    @param axes: Tensor with axes to transform.
+    @param signal_size: Tensor specifying signal size with respect to axes from the input 'axes'.
+    @return: The new node which performs DFT operation on the input data tensor.
+    """
+    if signal_size is None:
+        inputs = as_nodes(data, axes)
+    else:
+        inputs = as_nodes(data, axes, signal_size)
+
+    return _get_node_factory_opset7().create("DFT", inputs)
+
+
+@nameable_op
+def idft(
+        data: NodeInput,
+        axes: NodeInput,
+        signal_size: Optional[NodeInput] = None,
+) -> Node:
+    """Return a node which performs IDFT operation.
+
+    @param data: Tensor with transformed data.
+    @param axes: Tensor with axes to transform.
+    @param signal_size: Tensor specifying signal size with respect to axes from the input 'axes'.
+    @return: The new node which performs IDFT operation on the input data tensor.
+    """
+    if signal_size is None:
+        inputs = as_nodes(data, axes)
+    else:
+        inputs = as_nodes(data, axes, signal_size)
+
+    return _get_node_factory_opset7().create("IDFT", inputs)
diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py
index 8fee2c70faf..d97c735e970 100644
--- a/ngraph/python/tests/__init__.py
+++ b/ngraph/python/tests/__init__.py
@@ -159,4 +159,6 @@ xfail_issue_52463 = xfail_test(reason="test_operator_add_size1_singleton_broadca
 
 xfail_issue_49391 = xfail_test(reason="Roll is not implemented in CPU plugin.")
 
+xfail_issue_49359 = xfail_test(reason="DFT is not implemented in CPU plugin")
+xfail_issue_49375 = xfail_test(reason="IDFT is not implemented in CPU plugin")
 xfail_issue_45432 = xfail_test(reason="Einsum is not implemented in CPU plugin.")
diff --git a/ngraph/python/tests/test_ngraph/test_dft.py b/ngraph/python/tests/test_ngraph/test_dft.py
new file mode 100644
index 00000000000..465aa383567
--- /dev/null
+++ b/ngraph/python/tests/test_ngraph/test_dft.py
@@ -0,0 +1,119 @@
+import ngraph as ng
+import numpy as np
+from tests import xfail_issue_49359
+from tests.runtime import get_runtime
+
+
+def build_fft_input_data():
+    np.random.seed(202104)
+    return np.random.uniform(0, 1, (2, 10, 10, 2)).astype(np.float32)
+
+
+@xfail_issue_49359
+def test_dft_1d():
+    runtime = get_runtime()
+    input_data = build_fft_input_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([2], dtype=np.int64))
+
+    dft_node = ng.dft(input_tensor, input_axes)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.fft(np.squeeze(input_data.view(dtype=np.complex64), axis=-1), axis=2)
+    expected_results = np_results.view(dtype=np.float32).reshape((2, 10, 10, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.00001)
+
+
+@xfail_issue_49359
+def test_dft_2d():
+    runtime = get_runtime()
+    input_data = build_fft_input_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([1, 2], dtype=np.int64))
+
+    dft_node = ng.dft(input_tensor, input_axes)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.fft2(np.squeeze(input_data.view(dtype=np.complex64), axis=-1), axes=[1, 2])
+    expected_results = np_results.view(dtype=np.float32).reshape((2, 10, 10, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.000062)
+
+
+@xfail_issue_49359
+def test_dft_3d():
+    runtime = get_runtime()
+    input_data = build_fft_input_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([0, 1, 2], dtype=np.int64))
+
+    dft_node = ng.dft(input_tensor, input_axes)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.fftn(np.squeeze(input_data.view(dtype=np.complex64), axis=-1), axes=[0, 1, 2])
+    expected_results = np_results.view(dtype=np.float32).reshape((2, 10, 10, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.0002)
+
+
+@xfail_issue_49359
+def test_dft_1d_signal_size():
+    runtime = get_runtime()
+    input_data = build_fft_input_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([-2], dtype=np.int64))
+    input_signal_size = ng.constant(np.array([20], dtype=np.int64))
+
+    dft_node = ng.dft(input_tensor, input_axes, input_signal_size)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.fft(np.squeeze(input_data.view(dtype=np.complex64), axis=-1), n=20, axis=-2)
+    expected_results = np_results.view(dtype=np.float32).reshape((2, 20, 10, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.00001)
+
+
+@xfail_issue_49359
+def test_dft_2d_signal_size_1():
+    runtime = get_runtime()
+    input_data = build_fft_input_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([0, 2], dtype=np.int64))
+    input_signal_size = ng.constant(np.array([4, 5], dtype=np.int64))
+
+    dft_node = ng.dft(input_tensor, input_axes, input_signal_size)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.fft2(np.squeeze(input_data.view(dtype=np.complex64), axis=-1), s=[4, 5], axes=[0, 2])
+    expected_results = np_results.view(dtype=np.float32).reshape((4, 10, 5, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.000062)
+
+
+@xfail_issue_49359
+def test_dft_2d_signal_size_2():
+    runtime = get_runtime()
+    input_data = build_fft_input_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([1, 2], dtype=np.int64))
+    input_signal_size = ng.constant(np.array([4, 5], dtype=np.int64))
+
+    dft_node = ng.dft(input_tensor, input_axes, input_signal_size)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.fft2(np.squeeze(input_data.view(dtype=np.complex64), axis=-1), s=[4, 5], axes=[1, 2])
+    expected_results = np_results.view(dtype=np.float32).reshape((2, 4, 5, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.000062)
+
+
+@xfail_issue_49359
+def test_dft_3d_signal_size():
+    runtime = get_runtime()
+    input_data = build_fft_input_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([0, 1, 2], dtype=np.int64))
+    input_signal_size = ng.constant(np.array([4, 5, 16], dtype=np.int64))
+
+    dft_node = ng.dft(input_tensor, input_axes, input_signal_size)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.fftn(np.squeeze(input_data.view(dtype=np.complex64), axis=-1),
+                             s=[4, 5, 16], axes=[0, 1, 2])
+    expected_results = np_results.view(dtype=np.float32).reshape((4, 5, 16, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.0002)
diff --git a/ngraph/python/tests/test_ngraph/test_idft.py b/ngraph/python/tests/test_ngraph/test_idft.py
new file mode 100644
index 00000000000..ac352707caa
--- /dev/null
+++ b/ngraph/python/tests/test_ngraph/test_idft.py
@@ -0,0 +1,121 @@
+import ngraph as ng
+import numpy as np
+from tests import xfail_issue_49375
+from tests.runtime import get_runtime
+
+
+def get_data():
+    np.random.seed(202104)
+    return np.random.uniform(0, 1, (2, 10, 10, 2)).astype(np.float32)
+
+
+@xfail_issue_49375
+def test_idft_1d():
+    runtime = get_runtime()
+    expected_results = get_data()
+    complex_input_data = np.fft.fft(np.squeeze(expected_results.view(dtype=np.complex64), axis=-1), axis=2)
+    input_data = complex_input_data.view(dtype=np.float32).reshape((2, 10, 10, 2))
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([2], dtype=np.int64))
+
+    dft_node = ng.idft(input_tensor, input_axes)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    assert np.allclose(dft_results, expected_results, atol=0.000002)
+
+
+@xfail_issue_49375
+def test_idft_2d():
+    runtime = get_runtime()
+    expected_results = get_data()
+    complex_input_data = np.fft.fft2(np.squeeze(expected_results.view(dtype=np.complex64), axis=-1),
+                                     axes=[1, 2])
+    input_data = complex_input_data.view(dtype=np.float32).reshape((2, 10, 10, 2))
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([1, 2], dtype=np.int64))
+
+    dft_node = ng.idft(input_tensor, input_axes)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    assert np.allclose(dft_results, expected_results, atol=0.000002)
+
+
+@xfail_issue_49375
+def test_idft_3d():
+    runtime = get_runtime()
+    expected_results = get_data()
+    complex_input_data = np.fft.fft2(np.squeeze(expected_results.view(dtype=np.complex64), axis=-1),
+                                     axes=[0, 1, 2])
+    input_data = complex_input_data.view(dtype=np.float32).reshape((2, 10, 10, 2))
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([0, 1, 2], dtype=np.int64))
+
+    dft_node = ng.idft(input_tensor, input_axes)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    assert np.allclose(dft_results, expected_results, atol=0.000003)
+
+
+@xfail_issue_49375
+def test_idft_1d_signal_size():
+    runtime = get_runtime()
+    input_data = get_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([-2], dtype=np.int64))
+    input_signal_size = ng.constant(np.array([20], dtype=np.int64))
+
+    dft_node = ng.idft(input_tensor, input_axes, input_signal_size)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.ifft(np.squeeze(input_data.view(dtype=np.complex64), axis=-1), n=20, axis=-2)
+    expected_results = np_results.view(dtype=np.float32).reshape((2, 20, 10, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.000002)
+
+
+@xfail_issue_49375
+def test_idft_2d_signal_size_1():
+    runtime = get_runtime()
+    input_data = get_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([0, 2], dtype=np.int64))
+    input_signal_size = ng.constant(np.array([4, 5], dtype=np.int64))
+
+    dft_node = ng.idft(input_tensor, input_axes, input_signal_size)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.ifft2(np.squeeze(input_data.view(dtype=np.complex64), axis=-1), s=[4, 5], axes=[0, 2])
+    expected_results = np_results.view(dtype=np.float32).reshape((4, 10, 5, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.000002)
+
+
+@xfail_issue_49375
+def test_idft_2d_signal_size_2():
+    runtime = get_runtime()
+    input_data = get_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([1, 2], dtype=np.int64))
+    input_signal_size = ng.constant(np.array([4, 5], dtype=np.int64))
+
+    dft_node = ng.idft(input_tensor, input_axes, input_signal_size)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.fft2(np.squeeze(input_data.view(dtype=np.complex64), axis=-1), s=[4, 5], axes=[1, 2])
+    expected_results = np_results.view(dtype=np.float32).reshape((2, 4, 5, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.000002)
+
+
+@xfail_issue_49375
+def test_idft_3d_signal_size():
+    runtime = get_runtime()
+    input_data = get_data()
+    input_tensor = ng.constant(input_data)
+    input_axes = ng.constant(np.array([0, 1, 2], dtype=np.int64))
+    input_signal_size = ng.constant(np.array([4, 5, 16], dtype=np.int64))
+
+    dft_node = ng.idft(input_tensor, input_axes, input_signal_size)
+    computation = runtime.computation(dft_node)
+    dft_results = computation()
+    np_results = np.fft.ifftn(np.squeeze(input_data.view(dtype=np.complex64), axis=-1),
+                              s=[4, 5, 16], axes=[0, 1, 2])
+    expected_results = np_results.view(dtype=np.float32).reshape((4, 5, 16, 2))
+    assert np.allclose(dft_results, expected_results, atol=0.000002)

From dde837c4eb6be3c90f9bacfec8c957fe1aca5383 Mon Sep 17 00:00:00 2001
From: Mikhail Nosov <mikhail.nosov@intel.com>
Date: Wed, 5 May 2021 19:20:38 +0300
Subject: [PATCH 52/73] Add InferenceEngine::Core::LoadNetwork(modelPath,
 deviceName) to API_Changes (#5514)

---
 docs/IE_DG/API_Changes.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/IE_DG/API_Changes.md b/docs/IE_DG/API_Changes.md
index 372b2d320eb..a234471c13e 100644
--- a/docs/IE_DG/API_Changes.md
+++ b/docs/IE_DG/API_Changes.md
@@ -4,6 +4,10 @@ The sections below contain detailed list of changes made to the Inference Engine
 
 ## 2021.4
 
+### New API
+
+* InferenceEngine::Core::LoadNetwork(modelPath, deviceName, config) simplified API to read and load network in one call
+
 ### Deprecated API
 
  * InferenceEngine::Parameter(const std::shared_ptr<ngraph::Variant>&)

From 49a53854e2263657f982646f22116edb4b61b17a Mon Sep 17 00:00:00 2001
From: Vladislav Golubev <vladislav.golubev@intel.com>
Date: Thu, 6 May 2021 10:58:34 +0300
Subject: [PATCH 53/73] ConcatTransformation fix (#5482)

* [LPT] ConcatTransformation: fixed naming of outputs after split

* [LPT][TESTS] Concat with split tests: added verification of output names
---
 .../src/concat.cpp                            |  8 ++-
 .../src/split.cpp                             |  4 +-
 .../concat_with_split_transformation.cpp      | 54 ++++++++++++++--
 .../concat_with_split_transformation.cpp      |  3 +-
 .../lpt_ngraph_functions/concat_function.hpp  |  4 +-
 .../src/concat_function.cpp                   | 62 +++++++++++--------
 6 files changed, 99 insertions(+), 36 deletions(-)

diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp
index 02de081ec03..24cc5940c1b 100644
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@@ -414,7 +414,13 @@ void ConcatTransformation::addDequantizationLayers(
                         const std::string originalName = layer->get_friendly_name();
                         const std::string newName = layer->get_friendly_name() + LayerTransformation::originalLayerPostfix;
                         layer->set_friendly_name(newName);
-                        source->set_friendly_name(originalName);
+
+                        // Split & VariadicSplit have other naming rules
+                        if (is_type<opset1::Split>(layer) || is_type<opset1::VariadicSplit>(layer)) {
+                            source->set_friendly_name(originalName + "." + std::to_string(i));
+                        } else {
+                            source->set_friendly_name(originalName);
+                        }
                         subgraph.layers[layer->get_friendly_name()] = layer;
                     }
                 }
diff --git a/inference-engine/src/low_precision_transformations/src/split.cpp b/inference-engine/src/low_precision_transformations/src/split.cpp
index 5f0dbaebef0..91a32f77868 100644
--- a/inference-engine/src/low_precision_transformations/src/split.cpp
+++ b/inference-engine/src/low_precision_transformations/src/split.cpp
@@ -110,10 +110,8 @@ void SplitTransformation::updateOutputs(
                 std::shared_ptr<ngraph::Node> result = context.function->get_output_op(i);
                 std::shared_ptr<ngraph::Node> outputNode = result->get_input_node_shared_ptr(0);
                 if (outputNode.get() == lastNode.get()) {
-                    std::ostringstream oss;
-                    oss << i;
                     originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix);
-                    lastNode->set_friendly_name(originalName + "." + oss.str());
+                    lastNode->set_friendly_name(originalName + "." + std::to_string(i));
                     break;
                 }
             }
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp
index 76b137d7468..5f966576594 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp
@@ -74,20 +74,23 @@ inline std::ostream& operator<<(std::ostream& out, const ConcatTransformationTes
 
 typedef std::tuple <
     ngraph::element::Type,
-    ConcatTransformationTestValues
+    ConcatTransformationTestValues,
+    bool // additional Convolution after Split
 > ConcatTransformationParams;
 
 class ConcatWithSplitTransformation : public LayerTransformation, public testing::WithParamInterface<ConcatTransformationParams> {
 public:
     void SetUp() override {
         const ngraph::element::Type precision = std::get<0>(GetParam());
-        ConcatTransformationTestValues testValues = std::get<1>(GetParam());
+        const ConcatTransformationTestValues testValues = std::get<1>(GetParam());
+        const bool addConvolution = std::get<2>(GetParam());
 
         actualFunction = ngraph::builder::subgraph::ConcatFunction::getOriginalWithSplitedIntermediate(
             precision,
             testValues.inputShape,
             testValues.actual.fakeQuantize1,
-            testValues.actual.fakeQuantize2);
+            testValues.actual.fakeQuantize2,
+            addConvolution);
 
         SimpleLowPrecisionTransformer transform;
         if (testValues.multiChannels) {
@@ -107,6 +110,7 @@ public:
             testValues.result.dequantizationBefore1,
             testValues.result.dequantizationBefore2,
             testValues.result.precisionAfterOperation,
+            addConvolution,
             testValues.result.dequantizationOperations1,
             testValues.result.dequantizationOperations2);
     }
@@ -114,11 +118,13 @@ public:
     static std::string getTestCaseName(testing::TestParamInfo<ConcatTransformationParams> obj) {
         const ngraph::element::Type precision = std::get<0>(obj.param);
         const ConcatTransformationTestValues testValues = std::get<1>(obj.param);
+        const bool addConvolution = std::get<2>(obj.param);
 
         std::ostringstream result;
         result <<
             LayerTransformation::getTestCaseNameByParams(precision, testValues.inputShape, testValues.params) << "_" <<
             (testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") <<
+            (addConvolution ? "" : "without_convolution_") <<
             testValues.actual << "_" <<
             testValues.result << "_";
         return result.str();
@@ -127,7 +133,7 @@ public:
 
 TEST_P(ConcatWithSplitTransformation, CompareFunctions) {
     actualFunction->validate_nodes_and_infer_types();
-    auto res = compare_functions(referenceFunction, actualFunction, true);
+    auto res = compare_functions(referenceFunction, actualFunction, true, true);
     ASSERT_TRUE(res.first) << res.second;
 }
 
@@ -136,6 +142,7 @@ const std::vector<ngraph::element::Type> precisions = {
     // ngraph::element::f16
 };
 
+namespace casesWithConvolution {
 const std::vector<ConcatTransformationTestValues> testValues = {
     // U8: concat
     {
@@ -298,6 +305,43 @@ INSTANTIATE_TEST_CASE_P(
     ConcatWithSplitTransformation,
     ::testing::Combine(
         ::testing::ValuesIn(precisions),
-        ::testing::ValuesIn(testValues)),
+        ::testing::ValuesIn(testValues),
+        ::testing::Values(true)),
     ConcatWithSplitTransformation::getTestCaseName);
+} // namespace casesWithConvolution
+
+// test cases to check output names
+namespace casesWithoutConvolution {
+const std::vector<ConcatTransformationTestValues> testValues = {
+    {
+        { 1, 6, 10, 10 },
+        LayerTransformation::createParamsU8I8(),
+        true,
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }
+        },
+        {
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {255.f}},
+            { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, { 255.f}},
+            ngraph::element::u8,
+            {{}, {}, {}},
+            {{}, {}, {}},
+            ngraph::element::u8,
+            { ngraph::element::f32, {}, {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} },
+            { ngraph::element::f32, {}, { 0.005f } }
+        }
+    },
+};
+
+INSTANTIATE_TEST_CASE_P(
+    smoke_LPT,
+    ConcatWithSplitTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(precisions),
+        ::testing::ValuesIn(testValues),
+        ::testing::Values(false)),
+    ConcatWithSplitTransformation::getTestCaseName);
+} // namespace casesWithoutConvolution
+
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_split_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_split_transformation.cpp
index fca261f6b4f..3c67dd1b729 100644
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_split_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_split_transformation.cpp
@@ -63,7 +63,8 @@ void ConcatWithSplitTransformation::SetUp() {
         netPrecision,
         inputShapes,
         param.fqOnData1,
-        param.fqOnData2);
+        param.fqOnData2,
+        true);
 }
 
 TEST_P(ConcatWithSplitTransformation, CompareWithRefImpl) {
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
index 95da3db91ef..7fe001835db 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
@@ -53,7 +53,8 @@ public:
         const ngraph::element::Type precision,
         const ngraph::Shape& inputShape,
         const FakeQuantizeOnData& fqOnData1,
-        const FakeQuantizeOnData& fqOnData2);
+        const FakeQuantizeOnData& fqOnData2,
+        const bool addConvolution);
 
     static std::shared_ptr<ngraph::Function> getOriginalSelectionWithIntermediate(
         const ngraph::element::Type precision,
@@ -151,6 +152,7 @@ public:
         const DequantizationOperations& dequantizationBefore1,
         const DequantizationOperations& dequantizationBefore2,
         const ngraph::element::Type precisionAfterOperation,
+        const bool addConvolution,
         const DequantizationOperations& dequantizationOperations1,
         const DequantizationOperations& dequantizationOperations2);
 
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
index 8b251a4d9be..063825b1872 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
@@ -239,7 +239,8 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithSplitedIntermed
     const ngraph::element::Type precision,
     const ngraph::Shape& inputShape,
     const FakeQuantizeOnData& fqOnData1,
-    const FakeQuantizeOnData& fqOnData2) {
+    const FakeQuantizeOnData& fqOnData2,
+    const bool addConvolution) {
     size_t numSplit = 2;
     size_t splitedAxis = 1;
 
@@ -272,24 +273,28 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithSplitedIntermed
 
     const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(
         ngraph::OutputVector{ fakeQuantize1->output(0), intermediateOp->output(0) }, splitedAxis);
-    concat->set_friendly_name("concat");
+    concat->set_friendly_name("output_1");
 
     auto& rtInfo = concat->get_rt_info();
     rtInfo["Variant::std::string"] = std::make_shared<VariantWrapper<std::string>>("concat");
 
-    auto weights = ngraph::opset1::Constant::create(precision, ngraph::Shape{ inputShape[1] / numSplit, inputShape[1] / numSplit, 1, 1 }, { 1 });
-    auto convolution = std::make_shared<ngraph::opset1::Convolution>(
-        intermediateOp->output(1),
-        weights,
-        ngraph::Strides{ 1, 1 },
-        ngraph::CoordinateDiff{ 0, 0 },
-        ngraph::CoordinateDiff{ 0, 0 },
-        ngraph::Strides{ 1, 1 });
-    convolution->set_friendly_name("convolution");
+    Output<Node> lastOutput = intermediateOp->output(1);
+    if (addConvolution) {
+        auto weights = ngraph::opset1::Constant::create(precision, ngraph::Shape{ inputShape[1] / numSplit, inputShape[1] / numSplit, 1, 1 }, { 1 });
+        auto convolution = std::make_shared<ngraph::opset1::Convolution>(
+            intermediateOp->output(1),
+            weights,
+            ngraph::Strides{ 1, 1 },
+            ngraph::CoordinateDiff{ 0, 0 },
+            ngraph::CoordinateDiff{ 0, 0 },
+            ngraph::Strides{ 1, 1 });
+        lastOutput = convolution->output(0);
+    }
+    lastOutput.get_node_shared_ptr()->set_friendly_name("output_2");
 
     ngraph::ResultVector results{
         std::make_shared<ngraph::opset1::Result>(concat),
-        std::make_shared<ngraph::opset1::Result>(convolution),
+        std::make_shared<ngraph::opset1::Result>(lastOutput),
     };
 
     std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
@@ -964,6 +969,7 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithSplitedInterme
     const DequantizationOperations& dequantizationBefore1,
     const DequantizationOperations& dequantizationBefore2,
     const ngraph::element::Type precisionAfterOperation,
+    const bool addConvolution,
     const DequantizationOperations& dequantizationOperations1,
     const DequantizationOperations& dequantizationOperations2) {
     size_t numSplit = 2;
@@ -1005,7 +1011,6 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithSplitedInterme
 
     const auto constant = std::make_shared<ngraph::opset1::Constant>(element::i64, Shape{ }, splitedAxis);
     intermediateOp = std::make_shared<ngraph::opset1::Split>(deqBefore2, constant, numSplit);
-
     intermediateOp->set_friendly_name("intermediate");
 
     const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(
@@ -1017,23 +1022,30 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithSplitedInterme
 
     const auto lastDequantization1 = makeDequantization(concat, dequantizationOperations1);
     const auto lastDequantization2 = makeDequantization(intermediateOp->output(1), dequantizationOperations2);
+    lastDequantization1->set_friendly_name("output_1");
 
-    auto weights = ngraph::opset1::Constant::create(
-        precision,
-        ngraph::Shape{ inputShape[1] / numSplit, inputShape[1] / numSplit, 1, 1 }, { 1 });
+    Output<Node> lastOutput = lastDequantization2;
+    if (addConvolution) {
+        auto weights = ngraph::opset1::Constant::create(
+            precision,
+            ngraph::Shape{ inputShape[1] / numSplit, inputShape[1] / numSplit, 1, 1 }, { 1 });
 
-    auto convolution = std::make_shared<ngraph::opset1::Convolution>(
-        lastDequantization2,
-        weights,
-        ngraph::Strides{ 1, 1 },
-        ngraph::CoordinateDiff{ 0, 0 },
-        ngraph::CoordinateDiff{ 0, 0 },
-        ngraph::Strides{ 1, 1 });
-    convolution->set_friendly_name("convolution");
+        auto convolution = std::make_shared<ngraph::opset1::Convolution>(
+            lastDequantization2,
+            weights,
+            ngraph::Strides{ 1, 1 },
+            ngraph::CoordinateDiff{ 0, 0 },
+            ngraph::CoordinateDiff{ 0, 0 },
+            ngraph::Strides{ 1, 1 });
+        convolution->set_friendly_name("output_2");
+        lastOutput = convolution->output(0);
+    } else {
+        lastOutput.get_node_shared_ptr()->set_friendly_name("output_2.1");
+    }
 
     ngraph::ResultVector results{
         std::make_shared<ngraph::opset1::Result>(lastDequantization1),
-        std::make_shared<ngraph::opset1::Result>(convolution)
+        std::make_shared<ngraph::opset1::Result>(lastOutput)
     };
 
     std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(

From fa4a67ab250318e23f64b03470e1e415c3d9bfa2 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Thu, 6 May 2021 11:31:29 +0300
Subject: [PATCH 54/73] [IE CLDNN] Fixed FQ in byxf layout and pooling in fsv32
 with int8 input (#5431)

---
 .../quantize_kernel_scale_shift_opt.cpp       | 25 ++-----------------
 .../thirdparty/clDNN/src/gpu/pooling_gpu.cpp  |  2 --
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
index 14550c5f55a..c0dee93c4b6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_scale_shift_opt.cpp
@@ -20,29 +20,8 @@ ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const {
     k.EnableOutputDataType(Datatype::F32);
     k.EnableOutputDataType(Datatype::UINT8);
     k.EnableOutputDataType(Datatype::INT8);
-    k.EnableInputLayout(DataLayout::bfyx);
-    k.EnableInputLayout(DataLayout::yxfb);
-    k.EnableInputLayout(DataLayout::byxf);
-    k.EnableInputLayout(DataLayout::bfzyx);
-    k.EnableInputLayout(DataLayout::bfwzyx);
-    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
-    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
-    k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
-    k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
-    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
-    k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
-    k.EnableInputLayout(DataLayout::fs_b_yx_fsv32);
-    k.EnableOutputLayout(DataLayout::bfyx);
-    k.EnableOutputLayout(DataLayout::yxfb);
-    k.EnableOutputLayout(DataLayout::bfzyx);
-    k.EnableOutputLayout(DataLayout::bfwzyx);
-    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
-    k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
-    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
-    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
-    k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv32);
-    k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
-    k.EnableOutputLayout(DataLayout::fs_b_yx_fsv32);
+    k.EnableAllInputLayout();
+    k.EnableAllOutputLayout();
     k.EnableTensorOffset();
     k.EnableTensorPitches();
     k.EnableBatching();
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
index aec365a8d6b..138a235d6ce 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
@@ -211,8 +211,6 @@ attach_pooling_gpu::attach_pooling_gpu() {
 
     implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), pooling_gpu::create);
     implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fs_b_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_b_yx_fsv32), pooling_gpu::create);
 }
 
 }  // namespace detail

From 30b9d2ba13019471a56412970468a4d4822d3ada Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Thu, 6 May 2021 11:33:14 +0300
Subject: [PATCH 55/73] [IE CLDNN] Disabled vectorized ocl path for modes with
 bool output (#5521)

---
 .../eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp  |  3 ++
 .../eltwise/eltwise_kernel_base.cpp           | 28 +++++++++++++++++++
 .../eltwise/eltwise_kernel_base.h             |  2 ++
 .../eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp  |  3 ++
 .../eltwise/eltwise_kernel_vload8.cpp         |  3 ++
 .../tests/test_cases/eltwise_gpu_test.cpp     |  4 +++
 6 files changed, 43 insertions(+)

diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
index 5ed8df0fcd2..afaa202ffe0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
@@ -246,6 +246,9 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_param
     if (count % 8 != 0)
         return false;
 
+    if (IsUnsupportedModeForVecCode(params))
+        return false;
+
     for (size_t i = 0; i < params.inputs.size(); i++) {
         if ((params.inputs[i].GetLayout() != DataLayout::b_fs_yx_fsv16) &&
             (params.inputs[i].GetLayout() != DataLayout::b_fs_zyx_fsv16) &&
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
index abf22356be8..6a40df57a68 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
@@ -108,6 +108,34 @@ bool EltwiseKernelBase::Validate(const Params& p, const optional_params& o) cons
     return true;
 }
 
+bool EltwiseKernelBase::IsUnsupportedModeForVecCode(const eltwise_params& params) const {
+    // These modes are supposed to produce BOOL output type
+    // but this kernel uses vector data types, and these operation will produce 0xFFFF / 0x0000 instead of 0 / 1 values
+    // The value might be then converted to fp16/fp32 and used for some arithmetic, which will lead to invalid results, thus reject these modes
+    // to fallback on ref kernel with scalar types.
+    // TODO: Consider updating optimized kernels to produce 0/1 output for vector code if such operation is a bottleneck in some model
+    const std::vector<EltwiseMode> unsupported_modes = {
+        EltwiseMode::EQ,
+        EltwiseMode::NE,
+        EltwiseMode::LT,
+        EltwiseMode::LE,
+        EltwiseMode::GT,
+        EltwiseMode::GE,
+        EltwiseMode::LOGIC_AND,
+        EltwiseMode::LOGIC_OR,
+        EltwiseMode::LOGIC_XOR,
+        EltwiseMode::FLOOR_MOD,
+    };
+
+    for (size_t op_num = 0; op_num <  params.operations.size(); op_num++) {
+        const auto& ew =  params.operations[op_num];
+        if (std::find(unsupported_modes.begin(), unsupported_modes.end(), ew.mode) != unsupported_modes.end())
+            return true;
+    }
+
+    return false;
+}
+
 JitConstants EltwiseKernelBase::GetOperationsJitConstants(const eltwise_params& params, bool useVload8, size_t blockSize) const {
     JitConstants jit = {};
     for (size_t op_num = 0; op_num < params.operations.size(); op_num++) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
index 4a340048f25..ee24f5c3bca 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
@@ -119,5 +119,7 @@ protected:
     virtual DispatchData SetDefault(const eltwise_params& params) const;
     KernelsData GetCommonKernelsData(const Params& params, const optional_params& options) const;
     Datatype GetAccumulatorType(const eltwise_params &params) const;
+
+    bool IsUnsupportedModeForVecCode(const eltwise_params& params) const;
 };
 }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp
index 4e8657a06a1..f8b43ea01a9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp
@@ -58,6 +58,9 @@ bool EltwiseKernel_fs_b_yx_fsv32::Validate(const Params& params, const optional_
         }
     }
 
+    if (IsUnsupportedModeForVecCode(ewParams))
+        return false;
+
     if (!bCheckSizes || !bSupportedCount || !bCheckUpdateInput || !bCheckUseOutput) {
         return false;
     }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
index 8b9925bbf19..8ee06f60a54 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
@@ -73,6 +73,9 @@ bool EltwiseKernel_vload8::Validate(const Params& params, const optional_params&
         }
     }
 
+    if (IsUnsupportedModeForVecCode(ewParams))
+        return false;
+
     if (!bCheckSizes || !bSupportedCount || !bCheckUpdateInput || !bCheckUseOutput) {
         return false;
     }
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
index eb70de8ef5f..436d2a2a359 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
@@ -41,6 +41,8 @@ T eltwise_execute(cldnn::eltwise_mode mode, T x, T y) {
         return std::pow((float)x, (float)y);
     case eltwise_mode::mod:
         return std::fmod((float)x, (float)y);
+    case eltwise_mode::eq:
+        return (float)((float)x == (float)y);
     default:
         return (T)0;
     }
@@ -3540,6 +3542,7 @@ struct eltwise_layout_test_params {
 #define CASE_ELTWISE_TEST6  eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
 #define CASE_ELTWISE_TEST7  eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
 #define CASE_ELTWISE_TEST8  eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
+#define CASE_ELTWISE_TEST9  eltwise_mode::eq,  {4, 2, 4, 4}, {1, 1, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
 
 class eltwise_layout_test : public BaseEltwiseTest<eltwise_layout_test_params> {
 };
@@ -3619,4 +3622,5 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_layout,
                             eltwise_layout_test_params{CASE_ELTWISE_TEST6},
                             eltwise_layout_test_params{CASE_ELTWISE_TEST7},
                             eltwise_layout_test_params{CASE_ELTWISE_TEST8},
+                            eltwise_layout_test_params{CASE_ELTWISE_TEST9},
                         }), );

From 5834eef8f01de9abdab24eb0eb84b894b3dac31f Mon Sep 17 00:00:00 2001
From: Victor Kuznetsov <32412802+just-sparta@users.noreply.github.com>
Date: Thu, 6 May 2021 11:54:16 +0300
Subject: [PATCH 56/73] Change model_path to model_name in timeline report
 (#5457)

---
 .../scripts/memcheck-template/timeline_report.html          | 2 +-
 tests/stress_tests/scripts/memcheck_upload.py               | 6 +++---
 tests/stress_tests/scripts/run_memcheck.py                  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/stress_tests/scripts/memcheck-template/timeline_report.html b/tests/stress_tests/scripts/memcheck-template/timeline_report.html
index d6f3132e584..f2accdb9f01 100644
--- a/tests/stress_tests/scripts/memcheck-template/timeline_report.html
+++ b/tests/stress_tests/scripts/memcheck-template/timeline_report.html
@@ -13,7 +13,7 @@
 <h2>Memcheck report</h2>
     {% for timeline in timelines %}
     <div class="w3-container">
-        <h4> {{ timeline['device'][0]|e }} {{ timeline['model'][0]|e }} {{ timeline['test_name'][0]|e }} </h4>
+        <h4> {{ timeline['device'][0]|e }} {{ timeline['model_name'][0]|e }} {{ timeline['test_name'][0]|e }} </h4>
         <div class={{ '"w3-text-green"' if timeline['status'] else '"w3-text-red"' }}>
             {{ "PASS" if timeline['status'] else "FAIL" }}
         </div>
diff --git a/tests/stress_tests/scripts/memcheck_upload.py b/tests/stress_tests/scripts/memcheck_upload.py
index 43b217a1275..663a513c856 100644
--- a/tests/stress_tests/scripts/memcheck_upload.py
+++ b/tests/stress_tests/scripts/memcheck_upload.py
@@ -80,8 +80,8 @@ def info_from_test_config(test_conf):
     test_conf_root = test_conf_obj.getroot()
     records = {}
     for model_rec in test_conf_root.find("models"):
-        model = model_rec.attrib["path"]
-        records[Path(model)] = {
+        model_name = model_rec.attrib["name"]
+        records[model_name] = {
             "framework": model_rec.attrib.get("framework"),
             "source": model_rec.attrib.get("source"),
         }
@@ -235,7 +235,7 @@ def create_memcheck_report(records, db_url, db_collection, output_path):
     """ Create memcheck timeline HTML report for records.
     """
     records.sort(
-        key=lambda item: f"{item['status']}{item['device']}{item['model']}{item['test_name']}")
+        key=lambda item: f"{item['status']}{item['device']}{item['model_name']}{item['test_name']}")
     timelines = query_timeline(records, db_url, db_collection)
     import jinja2  # pylint: disable=import-outside-toplevel
     env = jinja2.Environment(
diff --git a/tests/stress_tests/scripts/run_memcheck.py b/tests/stress_tests/scripts/run_memcheck.py
index 01deaa92723..b8f32b4c589 100755
--- a/tests/stress_tests/scripts/run_memcheck.py
+++ b/tests/stress_tests/scripts/run_memcheck.py
@@ -177,7 +177,7 @@ def main():
         if test_conf:
             info = info_from_test_config(test_conf)
             for record in records:
-                record.update(info.get(Path(record["model"]), {}))
+                record.update(info.get(record["model_name"], {}))
 
         # upload
         if args.upload:

From 935405ad2d7e6c8b91b638a4513fa150581b28dd Mon Sep 17 00:00:00 2001
From: Irina Efode <irina.efode@intel.com>
Date: Thu, 6 May 2021 12:56:09 +0300
Subject: [PATCH 57/73] [IE TESTS] Remove dummy file for beh tests (#5436)

---
 .../tests_deprecated/behavior/CMakeLists.txt  | 12 -------
 .../behavior/cldnn/CMakeLists.txt             | 29 -----------------
 .../tests_deprecated/behavior/cldnn/dummy.cpp |  4 ---
 .../behavior/gna/CMakeLists.txt               | 29 -----------------
 .../tests_deprecated/behavior/gna/dummy.cpp   |  4 ---
 .../behavior/mkldnn/CMakeLists.txt            | 31 -------------------
 .../behavior/mkldnn/dummy.cpp                 |  4 ---
 7 files changed, 113 deletions(-)
 delete mode 100644 inference-engine/tests_deprecated/behavior/cldnn/CMakeLists.txt
 delete mode 100644 inference-engine/tests_deprecated/behavior/cldnn/dummy.cpp
 delete mode 100644 inference-engine/tests_deprecated/behavior/gna/CMakeLists.txt
 delete mode 100644 inference-engine/tests_deprecated/behavior/gna/dummy.cpp
 delete mode 100644 inference-engine/tests_deprecated/behavior/mkldnn/CMakeLists.txt
 delete mode 100644 inference-engine/tests_deprecated/behavior/mkldnn/dummy.cpp

diff --git a/inference-engine/tests_deprecated/behavior/CMakeLists.txt b/inference-engine/tests_deprecated/behavior/CMakeLists.txt
index 888b7b9a641..8b32bf5c19d 100644
--- a/inference-engine/tests_deprecated/behavior/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/behavior/CMakeLists.txt
@@ -6,18 +6,6 @@ add_subdirectory(shared_tests)
 
 disable_deprecated_warnings()
 
-if (ENABLE_MKL_DNN)
-    add_subdirectory(mkldnn)
-endif()
-
-if (ENABLE_CLDNN)
-    add_subdirectory(cldnn)
-endif()
-
-if (ENABLE_GNA)
-    add_subdirectory(gna)
-endif()
-
 if (ENABLE_HDDL OR ENABLE_MYRIAD)
     add_subdirectory(vpu)
 endif()
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/CMakeLists.txt b/inference-engine/tests_deprecated/behavior/cldnn/CMakeLists.txt
deleted file mode 100644
index 530746af62d..00000000000
--- a/inference-engine/tests_deprecated/behavior/cldnn/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (C) 2018-2021 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-
-set(TARGET_NAME ClDnnBehaviorTests)
-
-file(GLOB_RECURSE TEST_INCLUDE
-        ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
-
-file(GLOB_RECURSE TEST_SRC
-        ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
-        )
-
-list(APPEND DEPENDENCIES
-        clDNNPlugin)
-
-source_group("src" FILES ${TEST_SRC})
-source_group("include" FILES ${TEST_INCLUDE})
-
-add_executable(${TARGET_NAME}
-        ${TEST_SRC}
-        ${TEST_INCLUDE})
-
-target_link_libraries(${TARGET_NAME} PRIVATE IEBehaviorSharedTests)
-
-add_test(NAME ${TARGET_NAME}
-        COMMAND ${TARGET_NAME})
-
-add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/dummy.cpp b/inference-engine/tests_deprecated/behavior/cldnn/dummy.cpp
deleted file mode 100644
index 936eaeb8151..00000000000
--- a/inference-engine/tests_deprecated/behavior/cldnn/dummy.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
diff --git a/inference-engine/tests_deprecated/behavior/gna/CMakeLists.txt b/inference-engine/tests_deprecated/behavior/gna/CMakeLists.txt
deleted file mode 100644
index ad1e33fdb40..00000000000
--- a/inference-engine/tests_deprecated/behavior/gna/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (C) 2018-2021 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-
-set(TARGET_NAME GnaBehaviorTests)
-
-file(GLOB_RECURSE TEST_INCLUDE
-        ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
-
-file(GLOB_RECURSE TEST_SRC
-        ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
-        )
-
-list(APPEND DEPENDENCIES
-        GNAPlugin)
-
-source_group("src" FILES ${TEST_SRC})
-source_group("include" FILES ${TEST_INCLUDE})
-
-add_executable(${TARGET_NAME}
-        ${TEST_SRC}
-        ${TEST_INCLUDE})
-
-target_link_libraries(${TARGET_NAME} PRIVATE IEBehaviorSharedTests)
-
-add_test(NAME ${TARGET_NAME}
-        COMMAND ${TARGET_NAME})
-
-add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/behavior/gna/dummy.cpp b/inference-engine/tests_deprecated/behavior/gna/dummy.cpp
deleted file mode 100644
index 936eaeb8151..00000000000
--- a/inference-engine/tests_deprecated/behavior/gna/dummy.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/CMakeLists.txt b/inference-engine/tests_deprecated/behavior/mkldnn/CMakeLists.txt
deleted file mode 100644
index 1127d7c5458..00000000000
--- a/inference-engine/tests_deprecated/behavior/mkldnn/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (C) 2018-2021 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-
-set(TARGET_NAME MklDnnBehaviorTests)
-
-file(GLOB_RECURSE TEST_INCLUDE
-        ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
-
-file(GLOB_RECURSE TEST_SRC
-        ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
-        )
-
-list(APPEND DEPENDENCIES
-        MKLDNNPlugin)
-
-source_group("src" FILES ${TEST_SRC})
-source_group("include" FILES ${TEST_INCLUDE})
-
-add_executable(${TARGET_NAME}
-        ${TEST_SRC}
-        ${TEST_INCLUDE})
-
-target_link_libraries(${TARGET_NAME} PRIVATE IEBehaviorSharedTests)
-
-target_include_directories(${TARGET_NAME} PRIVATE $<TARGET_PROPERTY:inference_engine_preproc,INTERFACE_INCLUDE_DIRECTORIES> ${IE_MAIN_SOURCE_DIR}/samples)
-
-add_test(NAME ${TARGET_NAME}
-        COMMAND ${TARGET_NAME})
-
-add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/dummy.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/dummy.cpp
deleted file mode 100644
index 936eaeb8151..00000000000
--- a/inference-engine/tests_deprecated/behavior/mkldnn/dummy.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-

From e3ea9bf4dddfdc29f4c1952ba3deda5611cf3166 Mon Sep 17 00:00:00 2001
From: Yegor Kruglov <yegor.kruglov@intel.com>
Date: Thu, 6 May 2021 13:33:51 +0300
Subject: [PATCH 58/73] fix incorrect input names for mean values (#5508)

---
 model-optimizer/extensions/middle/AddMeanScaleValues.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/model-optimizer/extensions/middle/AddMeanScaleValues.py b/model-optimizer/extensions/middle/AddMeanScaleValues.py
index a9585bd4ebd..ac272451b26 100644
--- a/model-optimizer/extensions/middle/AddMeanScaleValues.py
+++ b/model-optimizer/extensions/middle/AddMeanScaleValues.py
@@ -7,10 +7,11 @@ import numpy as np
 
 from extensions.ops.elementwise import Add, Mul
 from mo.front.common.layout import get_features_dim
-from mo.front.extractor import split_node_in_port, get_node_id_with_ports
+from mo.front.extractor import get_node_id_with_ports
 from mo.front.tf.graph_utils import create_op_with_const_inputs
 from mo.graph.graph import Graph, Node
 from mo.middle.replacement import MiddleReplacementPattern
+from mo.utils.cli_parser import get_node_name_with_port_from_input_value
 from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
 
@@ -91,6 +92,7 @@ class AddMeanScaleValues(MiddleReplacementPattern):
 
         for node_name, node_mean_scale_values in values.items():
             node_id = None
+            node_name = get_node_name_with_port_from_input_value(node_name)
             try:
                 node_id, direction, port = get_node_id_with_ports(graph, node_name, skip_if_no_port=False)
                 assert direction != 'out', 'Only input port can be specified for mean/scale application'

From abb1ca657e90e367280bae0abb2192f5888bd8e7 Mon Sep 17 00:00:00 2001
From: Yegor Kruglov <yegor.kruglov@intel.com>
Date: Thu, 6 May 2021 13:41:49 +0300
Subject: [PATCH 59/73] Implementation of ArgMin ONNX + TF extractors (#5126)

* implement argmin extractors

* reconsidering argmax to topk

* arg ops refactoring

* rename ArgMaxToTopK

* added unittests

* update docs

* move unittest file to new folder

* conversations resolving

* revert changes with argmax.py, move argmin op to a new file

* rename ArgMaxSqueeze

* updated BOM file

* little fix

* code refactoring in ArgMaxOp, updated unittests

Co-authored-by: yegor.kruglov <ykruglov@nnlvdp-mkaglins.inn.intel.com>
---
 .../Supported_Frameworks_Layers.md            |   2 +
 model-optimizer/automation/package_BOM.txt    |   7 +-
 .../{ArgMaxSqueeze.py => ArgOpsSqueeze.py}    |  15 +-
 .../extensions/front/onnx/argmin_ext.py       |  26 +++
 .../extensions/front/tf/argmax_ext.py         |   2 +-
 .../extensions/front/tf/argmin_ext.py         |  25 +++
 .../{ArgMaxToTopK.py => ArgOpsToTopK.py}      |  16 +-
 model-optimizer/extensions/ops/argmax.py      |  80 ++++-----
 model-optimizer/extensions/ops/argmin.py      |  30 ++++
 .../extensions/front/caffe/argmax_ext_test.py |   4 +-
 .../extensions/middle/ArgOpsToTopK_test.py    | 152 ++++++++++++++++++
 .../unit_tests/extensions/ops/argmax_test.py  |  59 +++----
 12 files changed, 327 insertions(+), 91 deletions(-)
 rename model-optimizer/extensions/front/{ArgMaxSqueeze.py => ArgOpsSqueeze.py} (63%)
 create mode 100644 model-optimizer/extensions/front/onnx/argmin_ext.py
 create mode 100644 model-optimizer/extensions/front/tf/argmin_ext.py
 rename model-optimizer/extensions/middle/{ArgMaxToTopK.py => ArgOpsToTopK.py} (85%)
 create mode 100644 model-optimizer/extensions/ops/argmin.py
 create mode 100644 model-optimizer/unit_tests/extensions/middle/ArgOpsToTopK_test.py

diff --git a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
index a711292e462..df34f4085e5 100644
--- a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
+++ b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
@@ -128,6 +128,7 @@ Standard TensorFlow\* operations:
 | AddV2 | No |
 | AddN | No |
 | ArgMax | No |
+| ArgMin | No |
 | Asinh | No |
 | Atanh | No |
 | AvgPool | No |
@@ -398,6 +399,7 @@ Standard ONNX\* operators:
 | Add | No |
 | Affine | No |
 | ArgMax | No |
+| ArgMin | No |
 | Asin | No |
 | Asinh | No |
 | Atan | No |
diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index 3e6f79e9f10..2360008b7d8 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -63,7 +63,7 @@ extensions/back/TopKNormalizer.py
 extensions/back/TransposeReduceFusing.py
 extensions/back/UselessConcatRemoval.py
 extensions/front/__init__.py
-extensions/front/ArgMaxSqueeze.py
+extensions/front/ArgOpsSqueeze.py
 extensions/front/ATenToEmbeddingBag.py
 extensions/front/AttributedClampNormalizer.py
 extensions/front/AttributedGatherNormalizer.py
@@ -255,6 +255,7 @@ extensions/front/onnx/__init__.py
 extensions/front/onnx/activation_ext.py
 extensions/front/onnx/affine_ext.py
 extensions/front/onnx/argmax_ext.py
+extensions/front/onnx/argmin_ext.py
 extensions/front/onnx/aten_ext.py
 extensions/front/onnx/AttributedSliceToSlice.py
 extensions/front/onnx/cast_ext.py
@@ -368,6 +369,7 @@ extensions/front/Swish_fusion.py
 extensions/front/tf/__init__.py
 extensions/front/tf/activation_ext.py
 extensions/front/tf/argmax_ext.py
+extensions/front/tf/argmin_ext.py
 extensions/front/tf/assign_elimination.py
 extensions/front/tf/automl_efficientdet.json
 extensions/front/tf/AutomlEfficientDet.py
@@ -545,7 +547,7 @@ extensions/middle/AddIsCyclicAttribute.py
 extensions/middle/AddMeanScaleValues.py
 extensions/middle/ApplyNHWCtoNCHWpermutation.py
 extensions/middle/ApplyPermutations.py
-extensions/middle/ArgMaxToTopK.py
+extensions/middle/ArgOpsToTopK.py
 extensions/middle/AttributedTileNormalizer.py
 extensions/middle/BiasAddBroadcasting.py
 extensions/middle/BinarizeWeightsM1P1.py
@@ -641,6 +643,7 @@ extensions/ops/accum.py
 extensions/ops/activation_ops.py
 extensions/ops/adaptive_avg_pooling.py
 extensions/ops/argmax.py
+extensions/ops/argmin.py
 extensions/ops/assert_op.py
 extensions/ops/aten.py
 extensions/ops/axpy.py
diff --git a/model-optimizer/extensions/front/ArgMaxSqueeze.py b/model-optimizer/extensions/front/ArgOpsSqueeze.py
similarity index 63%
rename from model-optimizer/extensions/front/ArgMaxSqueeze.py
rename to model-optimizer/extensions/front/ArgOpsSqueeze.py
index da8b7283027..943e237c4dd 100644
--- a/model-optimizer/extensions/front/ArgMaxSqueeze.py
+++ b/model-optimizer/extensions/front/ArgOpsSqueeze.py
@@ -7,22 +7,21 @@ from mo.ops.const import Const
 from mo.ops.squeeze import Squeeze
 
 
-class ArgMaxSqueeze(FrontReplacementSubgraph):
+class ArgOpsSqueeze(FrontReplacementSubgraph):
     """
-        In some frameworks ArgMax operation has keepdims attribute that indicates whether to stay a dimension along
-        which maximum is computed or not. In case of keepdims=0 this dimension should be removed but ArgMax operation in
-        IR format is not designed to cover this case. So we should additionally add Squeeze operation right after ArgMax
-        for this case.
+        In some frameworks ArgMax/ArgMin operation has keepdims attribute that indicates whether to stay a dimension
+        along which maximum is computed or not. In case of keepdims=0 this dimension should be removed but ArgMax/ArgMin
+        operation in IR format is not designed to cover this case. So we should additionally add Squeeze operation right
+        after ArgMax/ArgMin for this case.
     """
-    op = "ArgMax"
     enabled = True
 
     def pattern(self):
-        return dict(nodes=[('argmax', dict(op='ArgMax', keepdims=0))],
+        return dict(nodes=[('node', dict(op=lambda x: x in ['ArgMax', 'ArgMin'], keepdims=0))],
                     edges=[])
 
     def replace_sub_graph(self, graph: Graph, match: dict):
-        node = match['argmax']
+        node = match['node']
 
         connected_ports = [port for port in node.in_ports().values() if not port.disconnected()]
         squeeze_node = Squeeze(graph, dict()).create_node([], dict(name=node.name + '/Squeeze'))
diff --git a/model-optimizer/extensions/front/onnx/argmin_ext.py b/model-optimizer/extensions/front/onnx/argmin_ext.py
new file mode 100644
index 00000000000..7337bbb0de7
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/argmin_ext.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from extensions.ops.argmin import ArgMinOp
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ArgMinFrontExtractor(FrontExtractorOp):
+    op = 'ArgMin'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        keepdims = onnx_attr(node, 'keepdims', 'i', default=1)
+        axis = onnx_attr(node, 'axis', 'i', default=0)
+
+        attrs = {
+            'axis': axis,
+            'top_k': 1,
+            'keepdims': keepdims,
+            'remove_values_output': True
+        }
+
+        ArgMinOp.update_node_stat(node, attrs)
+        return cls.enabled
diff --git a/model-optimizer/extensions/front/tf/argmax_ext.py b/model-optimizer/extensions/front/tf/argmax_ext.py
index cb308b74c2f..56144d05493 100644
--- a/model-optimizer/extensions/front/tf/argmax_ext.py
+++ b/model-optimizer/extensions/front/tf/argmax_ext.py
@@ -16,6 +16,6 @@ class ArgMaxFrontExtractor(FrontExtractorOp):
     def extract(cls, node):
         ArgMaxOp.update_node_stat(node, {'out_max_val': 0, 'top_k': 1, 'axis': None,
                                          'dim_attrs': ['axis'], 'keepdims': 0, 'remove_values_output': True,
-                                         'output_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int64),
+                                         'output_type': tf_dtype_extractor(node.pb.attr['output_type'].type, np.int64),
                                          })
         return cls.enabled
diff --git a/model-optimizer/extensions/front/tf/argmin_ext.py b/model-optimizer/extensions/front/tf/argmin_ext.py
new file mode 100644
index 00000000000..090383c77f7
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/argmin_ext.py
@@ -0,0 +1,25 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+
+from extensions.ops.argmin import ArgMinOp
+from mo.front.extractor import FrontExtractorOp
+from mo.front.tf.extractors.utils import tf_dtype_extractor
+
+
+class ArgMinFrontExtractor(FrontExtractorOp):
+    op = 'ArgMin'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        attrs = {
+            'top_k': 1,
+            'axis': None,
+            'keepdims': 0,
+            'remove_values_output': True,
+            'output_type': tf_dtype_extractor(node.pb.attr['output_type'].type, np.int64)
+        }
+        ArgMinOp.update_node_stat(node, attrs)
+        return cls.enabled
diff --git a/model-optimizer/extensions/middle/ArgMaxToTopK.py b/model-optimizer/extensions/middle/ArgOpsToTopK.py
similarity index 85%
rename from model-optimizer/extensions/middle/ArgMaxToTopK.py
rename to model-optimizer/extensions/middle/ArgOpsToTopK.py
index 2a3b9dd73cd..8f5f15f2044 100644
--- a/model-optimizer/extensions/middle/ArgMaxToTopK.py
+++ b/model-optimizer/extensions/middle/ArgOpsToTopK.py
@@ -8,24 +8,24 @@ from mo.ops.concat import Concat
 from mo.ops.const import Const
 
 
-class ArgMaxToTopK(MiddleReplacementPattern):
+class ArgOpsToTopK(MiddleReplacementPattern):
     """
-        The transformation replaces ArgMax with the TopK layer.
+        The transformation replaces ArgMax/ArgMin with the TopK layer.
     """
-    op = "ArgMax"
+
     enabled = True
     force_clean_up = True
 
     def pattern(self):
         return dict(
             nodes=[
-                ('argmax', dict(op='ArgMax')),
+                ('node', dict(op=lambda x: x in ['ArgMax', 'ArgMin'])),
             ],
             edges=[]
         )
 
     def replace_pattern(self, graph: Graph, match: dict):
-        node = match['argmax']
+        node = match['node']
         node_name = node.soft_get('name', node.id)
 
         connected_ports = [port for port in node.in_ports().values() if not port.disconnected()]
@@ -36,7 +36,9 @@ class ArgMaxToTopK(MiddleReplacementPattern):
 
         assert axis is not None, 'The "axis" should be defined for node "{}"'.format(node_name)
         assert node.has_and_set('output_type'), 'The data type is not set for node "{}"'.format(node_name)
-        topk_node = TopK(graph, {'axis': axis, 'mode': 'max', 'sort': 'index',
+
+        topk_mode = 'max' if node.op == 'ArgMax' else 'min'
+        topk_node = TopK(graph, {'axis': axis, 'mode': topk_mode, 'sort': 'index',
                                  'remove_values_output': node.has_and_set('remove_values_output'),
                                  'index_element_type': node.output_type}).create_node()
         node.in_port(0).get_connection().set_destination(topk_node.in_port(0))
@@ -47,7 +49,7 @@ class ArgMaxToTopK(MiddleReplacementPattern):
             topk_node.out_port(0).connect(concat_node.in_port(1))  # indices
             topk_node.out_port(1).connect(concat_node.in_port(0))  # values
             if not node.out_port(0).disconnected():
-                node.out_port(0).get_connection().set_source(concat_node.out_port(1))
+                node.out_port(0).get_connection().set_source(concat_node.out_port(0))
         else:
             if not node.out_port(0).disconnected():
                 node.out_port(0).get_connection().set_source(topk_node.out_port(1))
diff --git a/model-optimizer/extensions/ops/argmax.py b/model-optimizer/extensions/ops/argmax.py
index be6f4ecac62..d3b5a76adaf 100644
--- a/model-optimizer/extensions/ops/argmax.py
+++ b/model-optimizer/extensions/ops/argmax.py
@@ -6,18 +6,57 @@ import logging as log
 import numpy as np
 
 from mo.front.caffe.extractors.utils import get_canonical_axis_index
+from mo.front.common.partial_infer.utils import int64_array
 from mo.graph.graph import Node, Graph
 from mo.ops.op import Op, PermuteAttrs
 
 
+def arg_ops_infer(node: Node):
+    shape = node.in_port(0).data.get_shape()
+    node_name = node.soft_get('name', node.id)
+    assert shape is not None, "Input shape for the node {} is None".format(node_name)
+
+    # there are two inputs in TensorFlow. The second input is the axis for ArgMax
+    connected_in_ports = [port for port in node.in_ports().values() if not port.disconnected()]
+    if len(connected_in_ports) == 2:
+        axis = node.in_port(1).data.get_value()
+        if axis is None:
+            log.debug('The second argument to {} is None'.format(node.soft_get('name', node.id)))
+            return
+        node.axis = axis
+        # remove the unnecessary input
+        node.in_port(1).disconnect()
+
+    num_top_axes = shape.size
+    if num_top_axes < 3:
+        num_top_axes = 3
+
+    out_shape = np.ones(num_top_axes, dtype=np.int64)
+
+    if node.has_valid('axis'):
+        axis = get_canonical_axis_index(shape, node.axis)
+        node.axis = axis
+        out_shape = int64_array(shape)
+        out_shape[axis] = node.top_k
+        PermuteAttrs.create_permute_attrs(node, attrs=[('axis', 'input:0')])
+    else:
+        out_shape[0] = shape[0]
+        out_shape[2] = node.top_k
+        if node.has_and_set('out_max_val'):
+            out_shape[1] = 2
+
+    node.out_port(0).data.set_shape(out_shape)
+
+
 class ArgMaxOp(Op):
     op = 'ArgMax'
+    enabled = False
 
     def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
-            'type': __class__.op,
-            'op': __class__.op,
-            'infer': ArgMaxOp.argmax_infer,
+            'type': None,
+            'op': self.op,
+            'infer': arg_ops_infer,
             'output_type': np.int64,
             'in_ports_count': 2,
             'out_ports_count': 1,
@@ -30,38 +69,3 @@ class ArgMaxOp(Op):
             'top_k',
             'axis',
         ]
-
-    @staticmethod
-    def argmax_infer(node: Node):
-        shape = node.in_node(0).shape
-        if shape is None:
-            return
-
-        # there are two inputs in TensorFlow. The second input is the axis for ArgMax
-        if len(node.in_nodes()) == 2:
-            if node.in_node(1).value is None:
-                log.debug('The second argument to ArgMax is None')
-                return
-            node.axis = node.in_node(1).value.item()
-            # remove the unnecessary input
-            node.graph.remove_edge(node.in_node(1).id, node.id)
-
-        num_top_axes = shape.size
-        if num_top_axes < 3:
-            num_top_axes = 3
-
-        out_shape = np.ones(num_top_axes, dtype=int)
-
-        if node.has_valid('axis'):
-            axis = get_canonical_axis_index(shape, node.axis)
-            node.axis = axis
-            out_shape = np.array(shape)
-            out_shape[axis] = node.top_k
-            PermuteAttrs.create_permute_attrs(node, attrs=[('axis', 'input:0')])
-        else:
-            out_shape[0] = shape[0]
-            out_shape[2] = node.top_k
-            if node.out_max_val:
-                out_shape[1] = 2
-
-        node.out_node().shape = out_shape
diff --git a/model-optimizer/extensions/ops/argmin.py b/model-optimizer/extensions/ops/argmin.py
new file mode 100644
index 00000000000..3f3e9073aff
--- /dev/null
+++ b/model-optimizer/extensions/ops/argmin.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+
+from extensions.ops.argmax import arg_ops_infer
+from mo.graph.graph import Graph
+from mo.ops.op import Op
+
+
+class ArgMinOp(Op):
+    op = 'ArgMin'
+    enabled = False
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': None,
+            'op': self.op,
+            'infer': arg_ops_infer,
+            'output_type': np.int64,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    def supported_attrs(self):
+        return [
+            'top_k',
+            'axis',
+        ]
diff --git a/model-optimizer/unit_tests/extensions/front/caffe/argmax_ext_test.py b/model-optimizer/unit_tests/extensions/front/caffe/argmax_ext_test.py
index e48572d84e4..9519ba7148b 100644
--- a/model-optimizer/unit_tests/extensions/front/caffe/argmax_ext_test.py
+++ b/model-optimizer/unit_tests/extensions/front/caffe/argmax_ext_test.py
@@ -5,7 +5,7 @@ import unittest
 from unittest.mock import patch
 
 from extensions.front.caffe.argmax_ext import ArgMaxFrontExtractor
-from extensions.ops.argmax import ArgMaxOp
+from extensions.ops.argmax import ArgMaxOp, arg_ops_infer
 from mo.ops.op import Op
 from unit_tests.utils.extractors import FakeMultiParam
 from unit_tests.utils.graph import FakeNode
@@ -44,7 +44,7 @@ class TestArgMaxExt(unittest.TestCase):
             'out_max_val': True,
             'top_k': 100,
             'axis': 2,
-            'infer': ArgMaxOp.argmax_infer,
+            'infer': arg_ops_infer,
             'remove_values_output': True,
         }
 
diff --git a/model-optimizer/unit_tests/extensions/middle/ArgOpsToTopK_test.py b/model-optimizer/unit_tests/extensions/middle/ArgOpsToTopK_test.py
new file mode 100644
index 00000000000..512903f4c3f
--- /dev/null
+++ b/model-optimizer/unit_tests/extensions/middle/ArgOpsToTopK_test.py
@@ -0,0 +1,152 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+import numpy as np
+
+from extensions.middle.ArgOpsToTopK import ArgOpsToTopK
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import regular_op_with_empty_data, result, build_graph, connect, \
+    valued_const_with_data, regular_op, empty_data, connect_front
+
+nodes_attributes = {
+    **regular_op_with_empty_data('input', {'op': 'Parameter', 'type': 'Parameter'}),
+    **regular_op_with_empty_data('argmax', {'op': 'ArgMax', 'type': None, 'out_max_val': 0, 'top_k': 1, 'axis': 0,
+                                            'output_type': np.int32, 'remove_values_output': True}),
+    **regular_op_with_empty_data('argmin', {'op': 'ArgMin', 'type': None, 'top_k': 1, 'axis': 0,
+                                            'output_type': np.int32, 'remove_values_output': True}),
+    **result('result'),
+    **valued_const_with_data('axis_const', int64_array([1])),
+
+    **regular_op('topk', {'op': 'TopK', 'type': 'TopK', 'sort': 'index', 'index_element_type': np.int32}),
+    **empty_data('topk_out_0_data'),
+    **empty_data('topk_out_1_data'),
+    **regular_op_with_empty_data('topk_scalar', {'op': 'Const', 'type': 'Const', 'value': int64_array([1]),
+                                                 'shape': []}),
+
+
+    **regular_op_with_empty_data('concat', {'op': 'Concat', 'type': 'Concat', 'axis': 1})
+}
+
+
+class ArgOpsToTopKTest(unittest.TestCase):
+
+    def test_tf_argmax_to_topk(self):
+        graph = build_graph(nodes_attrs=nodes_attributes,
+                            edges=[
+                                *connect('input', '0:argmax'),
+                                *connect('axis_const', '1:argmax'),
+                                *connect('argmax', 'result')
+                            ],
+                            nodes_with_edges_only=True)
+        ArgOpsToTopK().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=nodes_attributes,
+                                edges=[
+                                    *connect('input', '0:topk'),
+                                    *connect('topk_scalar', '1:topk'),
+                                    *connect_front('topk:1', 'topk_out_1_data'),
+                                    *connect_front('topk_out_1_data', 'result'),
+                                ],
+                                update_attributes={
+                                    'topk': {'axis': int64_array([1]), 'mode': 'max', 'remove_values_output': True},
+                                },
+                                nodes_with_edges_only=True)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'input', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_tf_argmin_to_topk(self):
+        graph = build_graph(nodes_attrs=nodes_attributes,
+                            edges=[
+                                *connect('input', '0:argmin'),
+                                *connect('axis_const', '1:argmin'),
+                                *connect('argmin', 'result')
+                            ],
+                            nodes_with_edges_only=True)
+        ArgOpsToTopK().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=nodes_attributes,
+                                edges=[
+                                    *connect('input', '0:topk'),
+                                    *connect('topk_scalar', '1:topk'),
+                                    *connect_front('topk:1', 'topk_out_1_data'),
+                                    *connect_front('topk_out_1_data', 'result')
+                                ],
+                                update_attributes={
+                                    'topk': {'axis': int64_array([1]), 'mode': 'min', 'remove_values_output': True},
+                                },
+                                nodes_with_edges_only=True)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'input', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_onnx_argmax_to_topk(self):
+        graph = build_graph(nodes_attrs=nodes_attributes,
+                            edges=[
+                                *connect('input', 'argmax'),
+                                *connect('argmax', 'result')
+                            ],
+                            nodes_with_edges_only=True)
+        ArgOpsToTopK().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=nodes_attributes,
+                                edges=[
+                                    *connect('input', '0:topk'),
+                                    *connect('topk_scalar', '1:topk'),
+                                    *connect_front('topk:1', 'topk_out_1_data'),
+                                    *connect_front('topk_out_1_data', 'result')
+                                ],
+                                update_attributes={
+                                    'topk': {'axis': 0, 'mode': 'max', 'remove_values_output': True},
+                                },
+                                nodes_with_edges_only=True)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'input', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_onnx_argmin_to_topk(self):
+        graph = build_graph(nodes_attrs=nodes_attributes,
+                            edges=[
+                                *connect('input', 'argmin'),
+                                *connect('argmin', 'result')
+                            ],
+                            nodes_with_edges_only=True)
+        ArgOpsToTopK().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=nodes_attributes,
+                                edges=[
+                                    *connect('input', '0:topk'),
+                                    *connect('topk_scalar', '1:topk'),
+                                    *connect_front('topk:1', 'topk_out_1_data'),
+                                    *connect_front('topk_out_1_data', 'result')
+                                ],
+                                update_attributes={
+                                    'topk': {'axis': 0, 'mode': 'min', 'remove_values_output': True},
+                                },
+                                nodes_with_edges_only=True)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'input', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_caffe_argmax_to_topk(self):
+        graph = build_graph(nodes_attrs=nodes_attributes,
+                            edges=[
+                                *connect('input', 'argmax'),
+                                *connect('argmax', 'result')
+                            ],
+                            update_attributes={
+                                'argmax': {'out_max_val': 1}
+                            },
+                            nodes_with_edges_only=True)
+        ArgOpsToTopK().find_and_replace_pattern(graph)
+        ref_graph = build_graph(nodes_attrs=nodes_attributes,
+                                edges=[
+                                    *connect('input', '0:topk'),
+                                    *connect('topk_scalar', '1:topk'),
+                                    *connect_front('topk:0','topk_out_0_data'),
+                                    *connect_front('topk:1', 'topk_out_1_data'),
+                                    *connect_front('topk_out_0_data', '1:concat'),
+                                    *connect_front('topk_out_1_data', '0:concat'),
+                                    *connect('concat', 'result')
+                                ],
+                                update_attributes={
+                                    'topk': {'axis': 0, 'mode': 'max', 'remove_values_output': True},
+                                },
+                                nodes_with_edges_only=True)
+        (flag, resp) = compare_graphs(graph, ref_graph, 'input', check_op_attrs=True)
+        self.assertTrue(flag, resp)
\ No newline at end of file
diff --git a/model-optimizer/unit_tests/extensions/ops/argmax_test.py b/model-optimizer/unit_tests/extensions/ops/argmax_test.py
index 13361760f01..1b7e3858d73 100644
--- a/model-optimizer/unit_tests/extensions/ops/argmax_test.py
+++ b/model-optimizer/unit_tests/extensions/ops/argmax_test.py
@@ -5,21 +5,25 @@ import unittest
 
 import numpy as np
 
-from extensions.ops.argmax import ArgMaxOp
+from extensions.ops.argmax import arg_ops_infer
 from mo.graph.graph import Node
 from unit_tests.utils.graph import build_graph
 
-nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
+nodes_attributes = {
+                    'op_input': {'kind': 'op', 'op': 'Parameter'},
+                    'node_1': {'kind': 'data'},
                     'argmax': {'op': 'ArgMax', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'},
-                    'op_output': { 'kind': 'op', 'op': 'Result'}
+                    'node_3': {'kind': 'data', 'value': None},
+                    'op_output': {'kind': 'op', 'op': 'Result'}
                     }
 
 
 class TestArgMaxOp(unittest.TestCase):
     def test_caffe_argmax_axis(self):
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'argmax'),
+                            [
+                             ('op_input', 'node_1'),
+                             ('node_1', 'argmax'),
                              ('argmax', 'node_3'),
                              ('node_3', 'op_output')
                              ],
@@ -33,7 +37,7 @@ class TestArgMaxOp(unittest.TestCase):
                              })
 
         argmax_node = Node(graph, 'argmax')
-        ArgMaxOp.argmax_infer(argmax_node)
+        arg_ops_infer(argmax_node)
         exp_shape = np.array([1, 3, 100, 2049])
         res_shape = graph.node['node_3']['shape']
         for i in range(0, len(exp_shape)):
@@ -41,7 +45,9 @@ class TestArgMaxOp(unittest.TestCase):
 
     def test_caffe_argmax_axis_negative(self):
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'argmax'),
+                            [
+                             ('op_input', 'node_1'),
+                             ('node_1', 'argmax'),
                              ('argmax', 'node_3'),
                              ('node_3', 'op_output')
                              ],
@@ -55,7 +61,7 @@ class TestArgMaxOp(unittest.TestCase):
                              })
 
         argmax_node = Node(graph, 'argmax')
-        ArgMaxOp.argmax_infer(argmax_node)
+        arg_ops_infer(argmax_node)
         exp_shape = np.array([1, 3, 1025, 100])
         res_shape = graph.node['node_3']['shape']
         self.assertEqual(argmax_node.axis, 3)
@@ -64,7 +70,9 @@ class TestArgMaxOp(unittest.TestCase):
 
     def test_caffe_argmax_no_axis(self):
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'argmax'),
+                            [
+                             ('op_input', 'node_1'),
+                             ('node_1', 'argmax'),
                              ('argmax', 'node_3'),
                              ('node_3', 'op_output')
                              ],
@@ -77,7 +85,7 @@ class TestArgMaxOp(unittest.TestCase):
                              })
 
         argmax_node = Node(graph, 'argmax')
-        ArgMaxOp.argmax_infer(argmax_node)
+        arg_ops_infer(argmax_node)
         exp_shape = np.array([1, 2, 100, 1])
         res_shape = graph.node['node_3']['shape']
         for i in range(0, len(exp_shape)):
@@ -85,7 +93,9 @@ class TestArgMaxOp(unittest.TestCase):
 
     def test_caffe_argmax_extend_shape(self):
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'argmax'),
+                            [
+                             ('op_input', 'node_1'),
+                             ('node_1', 'argmax'),
                              ('argmax', 'node_3'),
                              ('node_3', 'op_output')
                              ],
@@ -98,7 +108,7 @@ class TestArgMaxOp(unittest.TestCase):
                              })
 
         argmax_node = Node(graph, 'argmax')
-        ArgMaxOp.argmax_infer(argmax_node)
+        arg_ops_infer(argmax_node)
         exp_shape = np.array([1, 2, 100])
         res_shape = graph.node['node_3']['shape']
         for i in range(0, len(exp_shape)):
@@ -106,7 +116,9 @@ class TestArgMaxOp(unittest.TestCase):
 
     def test_caffe_argmax_out_max_val_false(self):
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'argmax'),
+                            [
+                             ('op_input', 'node_1'),
+                             ('node_1', 'argmax'),
                              ('argmax', 'node_3'),
                              ('node_3', 'op_output')
                              ],
@@ -119,27 +131,8 @@ class TestArgMaxOp(unittest.TestCase):
                              })
 
         argmax_node = Node(graph, 'argmax')
-        ArgMaxOp.argmax_infer(argmax_node)
+        arg_ops_infer(argmax_node)
         exp_shape = np.array([1, 1, 100])
         res_shape = graph.node['node_3']['shape']
         for i in range(0, len(exp_shape)):
             self.assertEqual(exp_shape[i], res_shape[i])
-
-    def test_caffe_argmax_no_shape(self):
-        graph = build_graph(nodes_attributes,
-                            [('node_1', 'argmax'),
-                             ('argmax', 'node_3'),
-                             ('node_3', 'op_output')
-                             ],
-                            {'node_3': {'shape': None},
-                             'node_1': {'shape': None},
-                             'argmax': {
-                                 'out_max_val': False,
-                                 'top_k': 100
-                             }
-                             })
-
-        argmax_node = Node(graph, 'argmax')
-        ArgMaxOp.argmax_infer(argmax_node)
-        res_shape = graph.node['node_3']['shape']
-        self.assertIsNone(res_shape)

From 5bd6343ce5f7d2f92b56c7c95426915cfdbf5677 Mon Sep 17 00:00:00 2001
From: Egor Duplensky <egor.duplensky@gmail.com>
Date: Thu, 6 May 2021 14:44:01 +0300
Subject: [PATCH 60/73] Extend filling roi data for other precisions (#5432)

by making it template.
---
 .../src/read_ir/generate_inputs.cpp           | 13 ++++++------
 .../src/single_layer/roi_pooling.cpp          |  4 ++--
 .../common_test_utils/data_utils.hpp          | 21 +++++++++++--------
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/inference-engine/tests/functional/shared_test_classes/src/read_ir/generate_inputs.cpp b/inference-engine/tests/functional/shared_test_classes/src/read_ir/generate_inputs.cpp
index c4fc81dd7b7..0d52ca6dedc 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/read_ir/generate_inputs.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/read_ir/generate_inputs.cpp
@@ -273,13 +273,12 @@ InferenceEngine::Blob::Ptr generate(const std::shared_ptr<ngraph::op::v0::ROIPoo
         blob = make_blob_with_precision(info.getTensorDesc());
         blob->allocate();
 
-        CommonTestUtils::fill_data_roi(blob->buffer(),
-                                       blob->size(),
-                                       node->get_input_shape(0).front() - 1,
-                                       inputShape[2],
-                                       inputShape[3],
-                                       1.0f,
-                                       node->get_method() == "max");
+        CommonTestUtils::fill_data_roi<InferenceEngine::Precision::FP32>(blob,
+                                                                         node->get_input_shape(0).front() - 1,
+                                                                         inputShape[2],
+                                                                         inputShape[3],
+                                                                         1.0f,
+                                                                         node->get_method() == "max");
         return blob;
     }
     return FuncTestUtils::createAndFillBlob(info.getTensorDesc());
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/roi_pooling.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/roi_pooling.cpp
index f8d3ba85693..c9a6ed3d135 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/roi_pooling.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/roi_pooling.cpp
@@ -51,8 +51,8 @@ namespace LayerTestsDefinitions {
             if (it == 1) {
                 blob = make_blob_with_precision(info->getTensorDesc());
                 blob->allocate();
-                CommonTestUtils::fill_data_roi(blob->buffer(), blob->size(), feat_map_shape[0] - 1,
-                                               height, width, 1.0f, is_roi_max_mode);
+                CommonTestUtils::fill_data_roi<InferenceEngine::Precision::FP32>(blob, feat_map_shape[0] - 1,
+                                                                                 height, width, 1.0f, is_roi_max_mode);
             } else {
                 blob = GenerateInput(*info);
             }
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
index 50e2eb37e99..4a41693acd0 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
@@ -139,9 +139,12 @@ inline void fill_data_bbox(float *data, size_t size, int height, int width, floa
     }
 }
 
+template<InferenceEngine::Precision::ePrecision PRC>
 inline void
-fill_data_roi(float *data, size_t size, const uint32_t range, const int height, const int width, const float omega,
+fill_data_roi(InferenceEngine::Blob::Ptr &blob, const uint32_t range, const int height, const int width, const float omega,
               const bool is_roi_max_mode, const int seed = 1) {
+    using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
+    auto *data = blob->buffer().as<dataType *>();
     std::default_random_engine random(seed);
     std::uniform_int_distribution<int32_t> distribution(0, range);
 
@@ -151,31 +154,31 @@ fill_data_roi(float *data, size_t size, const uint32_t range, const int height,
     float center_h = (max_y) / 2.0f;
     float center_w = (max_x) / 2.0f;
 
-    for (size_t i = 0; i < size; i += 5) {
-        data[i] = static_cast<float>(distribution(random));
+    for (size_t i = 0; i < blob->size(); i += 5) {
+        data[i] = static_cast<dataType>(distribution(random));
         const float x0 = (center_w + width * 0.3f * sin(static_cast<float>(i + 1) * omega));
         const float x1 = (center_w + width * 0.3f * sin(static_cast<float>(i + 3) * omega));
-        data[i + 1] = is_roi_max_mode ? std::floor(x0) : x0;
-        data[i + 3] = is_roi_max_mode ? std::floor(x1) : x1;
+        data[i + 1] = static_cast<dataType>(is_roi_max_mode ? std::floor(x0) : x0);
+        data[i + 3] = static_cast<dataType>(is_roi_max_mode ? std::floor(x1) : x1);
         if (data[i + 3] < data[i + 1]) {
             std::swap(data[i + 1], data[i + 3]);
         }
         if (data[i + 1] < 0)
             data[i + 1] = 0;
         if (data[i + 3] > max_x)
-            data[i + 3] = static_cast<float>(max_x);
+            data[i + 3] = static_cast<dataType>(max_x);
 
         const float y0 = (center_h + height * 0.3f * sin(static_cast<float>(i + 2) * omega));
         const float y1 = (center_h + height * 0.3f * sin(static_cast<float>(i + 4) * omega));
-        data[i + 2] = is_roi_max_mode ? std::floor(y0) : y0;
-        data[i + 4] = is_roi_max_mode ? std::floor(y1) : y1;
+        data[i + 2] = static_cast<dataType>(is_roi_max_mode ? std::floor(y0) : y0);
+        data[i + 4] = static_cast<dataType>(is_roi_max_mode ? std::floor(y1) : y1);
         if (data[i + 4] < data[i + 2]) {
             std::swap(data[i + 2], data[i + 4]);
         }
         if (data[i + 2] < 0)
             data[i + 2] = 0;
         if (data[i + 4] > max_y)
-            data[i + 4] = static_cast<float>(max_y);
+            data[i + 4] = static_cast<dataType>(max_y);
     }
 }
 

From a411af166800523890fe58a6f7d6837f9d20d330 Mon Sep 17 00:00:00 2001
From: Polina Brzezinskaya <polina.brzezinskaya@intel.com>
Date: Thu, 6 May 2021 14:47:10 +0300
Subject: [PATCH 61/73] [IE][VPU]: Fix for crash in Myriad plugin during
 LoadNetwork with Hetero plugin (#5222)

---
 .../src/vpu/common/src/ngraph/query_network.cpp             | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/inference-engine/src/vpu/common/src/ngraph/query_network.cpp b/inference-engine/src/vpu/common/src/ngraph/query_network.cpp
index b2185db2846..a66331a9ebc 100644
--- a/inference-engine/src/vpu/common/src/ngraph/query_network.cpp
+++ b/inference-engine/src/vpu/common/src/ngraph/query_network.cpp
@@ -66,13 +66,11 @@ InferenceEngine::QueryNetworkResult getQueryNetwork(const InferenceEngine::CNNNe
         }
     }
 
-    for (const auto& layerName : supported) {
+    for (const auto& layerName : unsupported) {
         if (supported.empty()) {
             break;
         }
-        if (InferenceEngine::details::contains(unsupported, layerName)) {
-            supported.erase(layerName);
-        }
+        supported.erase(layerName);
     }
 
     unsupported.clear();

From 4790c79eb47eb2191d36daf1587deb270467e9b8 Mon Sep 17 00:00:00 2001
From: Victor Kuznetsov <32412802+just-sparta@users.noreply.github.com>
Date: Thu, 6 May 2021 14:57:50 +0300
Subject: [PATCH 62/73] Add time_tests dir path to sys.path (#5498)

---
 tests/time_tests/README.md               | 1 -
 tests/time_tests/scripts/run_timetest.py | 3 +++
 tests/time_tests/test_runner/conftest.py | 3 +++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/time_tests/README.md b/tests/time_tests/README.md
index 282f0d8394c..d542aff7925 100644
--- a/tests/time_tests/README.md
+++ b/tests/time_tests/README.md
@@ -34,6 +34,5 @@ cmake .. -DInferenceEngineDeveloperPackage_DIR=$(realpath ../../../build) && mak
 
 2. Run several configurations using `pytest`:
 ``` bash
-export PYTHONPATH=./:$PYTHONPATH
 pytest ./test_runner/test_timetest.py --exe ../../bin/intel64/Release/timetest_infer
 ```
diff --git a/tests/time_tests/scripts/run_timetest.py b/tests/time_tests/scripts/run_timetest.py
index 03ecad9b5b4..e0ff85d07b2 100644
--- a/tests/time_tests/scripts/run_timetest.py
+++ b/tests/time_tests/scripts/run_timetest.py
@@ -22,6 +22,9 @@ import yaml
 from pathlib import Path
 from pprint import pprint
 
+TIME_TESTS_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(TIME_TESTS_DIR)
+
 from test_runner.utils import filter_timetest_result
 
 
diff --git a/tests/time_tests/test_runner/conftest.py b/tests/time_tests/test_runner/conftest.py
index cd3e8267e3f..d0cb928ce07 100644
--- a/tests/time_tests/test_runner/conftest.py
+++ b/tests/time_tests/test_runner/conftest.py
@@ -29,6 +29,9 @@ import yaml
 from pathlib import Path
 from jsonschema import validate, ValidationError
 
+TIME_TESTS_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(TIME_TESTS_DIR)
+
 from scripts.run_timetest import check_positive_int
 from test_runner.utils import upload_timetest_data, metadata_from_manifest, get_os_name, get_os_version, \
     get_cpu_info, DATABASE, DB_COLLECTIONS

From 5e4cd1127b5ce58cbc8dab89b0dc47a1d1620d70 Mon Sep 17 00:00:00 2001
From: Andrey Zaytsev <andrey.zaytsev@intel.com>
Date: Thu, 6 May 2021 15:37:13 +0300
Subject: [PATCH 63/73] Integrate UAT fixes  (#5517)

* Added info on DockerHub CI Framework

* Feature/azaytsev/change layout (#3295)

* Changes according to feedback comments

* Replaced @ref's with html links

* Fixed links, added a title page for installing from repos and images, fixed formatting issues

* Added links

* minor fix

* Added DL Streamer to the list of components installed by default

* Link fixes

* Link fixes

* ovms doc fix (#2988)

* added OpenVINO Model Server

* ovms doc fixes

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>

* Updated openvino_docs.xml

* Edits to MO

Per findings spreadsheet

* macOS changes

per issue spreadsheet

* Fixes from review spreadsheet

Mostly IE_DG fixes

* Consistency changes

* Make doc fixes from last round of review

* integrate changes from baychub/master

* Update Intro.md

* Update Cutting_Model.md

* Update Cutting_Model.md

* Fixed link to Customize_Model_Optimizer.md

Co-authored-by: Trawinski, Dariusz <dariusz.trawinski@intel.com>
Co-authored-by: baychub <cbay@yahoo.com>
---
 docs/HOWTO/Custom_Layers_Guide.md             | 72 ++++++-------------
 docs/IE_DG/Bfloat16Inference.md               |  2 +-
 ...Deep_Learning_Inference_Engine_DevGuide.md | 13 ++--
 docs/IE_DG/DynamicBatching.md                 |  2 +-
 .../IE_DG/Extensibility_DG/AddingNGraphOps.md |  4 +-
 .../IE_DG/Extensibility_DG/Custom_ONNX_Ops.md |  2 +-
 docs/IE_DG/Extensibility_DG/Intro.md          |  8 +--
 docs/IE_DG/Extensibility_DG/VPU_Kernel.md     |  5 +-
 docs/IE_DG/GPU_Kernels_Tuning.md              |  9 ++-
 docs/IE_DG/InferenceEngine_QueryAPI.md        |  2 +-
 docs/IE_DG/Int8Inference.md                   |  6 +-
 ...grate_with_customer_application_new_API.md | 39 +++-------
 docs/IE_DG/Intro_to_Performance.md            |  2 +-
 docs/IE_DG/Introduction.md                    |  2 +-
 docs/IE_DG/Memory_primitives.md               |  2 +-
 docs/IE_DG/ONNX_Support.md                    |  4 +-
 docs/IE_DG/ShapeInference.md                  |  2 +-
 docs/IE_DG/inference_engine_intro.md          | 10 +--
 docs/IE_DG/network_state_intro.md             | 33 ++++-----
 docs/IE_DG/supported_plugins/GNA.md           |  2 +-
 .../supported_plugins/GPU_RemoteBlob_API.md   |  4 +-
 docs/IE_DG/supported_plugins/HDDL.md          | 12 ++--
 docs/IE_DG/supported_plugins/HETERO.md        | 19 +++--
 docs/IE_DG/supported_plugins/MULTI.md         | 36 +++++-----
 docs/IE_DG/supported_plugins/MYRIAD.md        |  2 +-
 .../supported_plugins/Supported_Devices.md    | 20 +++---
 docs/IE_DG/supported_plugins/VPU.md           |  2 +-
 .../Deep_Learning_Model_Optimizer_DevGuide.md |  2 +-
 docs/MO_DG/IR_and_opsets.md                   |  6 +-
 docs/MO_DG/Known_Issues_Limitations.md        |  2 +-
 .../prepare_model/Config_Model_Optimizer.md   |  2 +-
 .../Model_Optimization_Techniques.md          | 14 ++--
 .../prepare_model/Model_Optimizer_FAQ.md      |  6 +-
 .../prepare_model/Prepare_Trained_Model.md    |  2 +-
 .../convert_model/Convert_Model_From_Caffe.md | 19 ++---
 .../convert_model/Convert_Model_From_Kaldi.md | 14 ++--
 .../convert_model/Convert_Model_From_MxNet.md |  8 +--
 .../convert_model/Convert_Model_From_ONNX.md  |  6 +-
 .../Convert_Model_From_TensorFlow.md          | 56 +++++++--------
 .../convert_model/Converting_Model.md         |  4 +-
 .../convert_model/Converting_Model_General.md | 31 ++++----
 .../convert_model/Cutting_Model.md            | 31 ++++----
 .../IR_suitable_for_INT8_inference.md         |  8 +--
 .../kaldi_specific/Aspire_Tdnn_Model.md       |  2 +-
 .../mxnet_specific/Convert_GluonCV_Models.md  |  8 +--
 .../Convert_Style_Transfer_From_MXNet.md      | 19 ++---
 .../onnx_specific/Convert_DLRM.md             |  2 +-
 .../onnx_specific/Convert_GPT2.md             |  2 +-
 .../pytorch_specific/Convert_F3Net.md         |  2 +-
 .../pytorch_specific/Convert_YOLACT.md        |  2 +-
 .../Convert_CRNN_From_Tensorflow.md           |  8 +--
 .../Convert_DeepSpeech_From_Tensorflow.md     | 21 +++---
 .../Convert_EfficientDet_Models.md            | 11 +--
 .../Convert_GNMT_From_Tensorflow.md           |  4 +-
 .../Convert_NCF_From_Tensorflow.md            | 25 +++----
 .../Convert_Object_Detection_API_Models.md    |  5 +-
 .../Convert_WideAndDeep_Family_Models.md      |  3 +
 .../Convert_XLNet_From_Tensorflow.md          |  7 +-
 .../Convert_YOLO_From_Tensorflow.md           | 20 +++---
 .../Customize_Model_Optimizer.md              |  8 +--
 ...Net_Model_Optimizer_with_New_Primitives.md |  9 ++-
 ...odel_Optimizer_with_Caffe_Python_Layers.md |  7 +-
 ...ing_Model_Optimizer_with_New_Primitives.md |  4 +-
 .../Legacy_Mode_for_Caffe_Custom_Layers.md    |  4 +-
 .../Subgraph_Replacement_Model_Optimizer.md   |  2 +-
 docs/get_started/get_started_dl_workbench.md  |  6 +-
 docs/get_started/get_started_linux.md         | 30 +++++++-
 docs/get_started/get_started_macos.md         | 33 +++++++--
 docs/get_started/get_started_windows.md       | 24 ++++++-
 docs/index.md                                 | 28 ++++----
 docs/install_guides/PAC_Configure_2019RX.md   |  2 +-
 .../install_guides/deployment-manager-tool.md |  5 +-
 .../installing-openvino-linux.md              |  9 +--
 .../installing-openvino-macos.md              | 41 ++++++-----
 .../installing-openvino-raspbian.md           | 24 ++++---
 .../installing-openvino-windows.md            |  8 +--
 docs/nGraph_DG/intro.md                       |  6 +-
 docs/nGraph_DG/nGraphTransformation.md        |  2 +-
 docs/nGraph_DG/nGraph_basic_concepts.md       |  4 +-
 .../dldt_optimization_guide.md                |  2 +-
 docs/ovsa/ovsa_get_started.md                 |  2 +-
 .../object_detection_sample_ssd/README.md     |  2 +-
 .../ie_bridges/python/docs/api_overview.md    | 13 ++--
 .../sample/hello_classification/README.md     |  4 +-
 .../sample/hello_query_device/README.md       |  5 +-
 .../object_detection_sample_ssd/README.md     |  2 +-
 .../Offline_speech_recognition_demo.md        |  6 +-
 .../Speech_libs_and_demos.md                  | 10 +--
 .../samples/speech_sample/README.md           | 29 ++++----
 .../fluid/modules/gapi/doc/10-hld-overview.md |  2 +-
 inference-engine/tools/compile_tool/README.md |  6 +-
 91 files changed, 513 insertions(+), 494 deletions(-)

diff --git a/docs/HOWTO/Custom_Layers_Guide.md b/docs/HOWTO/Custom_Layers_Guide.md
index 1de91356304..13590e5d202 100644
--- a/docs/HOWTO/Custom_Layers_Guide.md
+++ b/docs/HOWTO/Custom_Layers_Guide.md
@@ -51,65 +51,45 @@ To see the operations that are supported by each device plugin for the Inference
 
 ### Custom Operation Support for the Model Optimizer
 
-Model Optimizer model conversion pipeline is described in details in "Model Conversion Pipeline" section on the
-[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
-It is recommended to read that article first for a better understanding of the following material.
+Model Optimizer model conversion pipeline is described in detail in "Model Conversion Pipeline" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). It is best to read that article first for a better understanding of the following material.
 
-Model Optimizer provides extensions mechanism to support new operations and implement custom model transformations to
-generate optimized IR. This mechanism is described in the "Model Optimizer Extensions" section on the
+Model Optimizer provides an extensions mechanism to support new operations and implement custom model transformations to generate optimized IR. This mechanism is described in the "Model Optimizer Extensions" section of 
 [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
 
-Two types of the Model Optimizer extensions should be implemented to support custom operation at minimum:
-1. Operation class for a new operation. This class stores information about the operation, its attributes, shape
-inference function, attributes to be saved to an IR and some others internally used attributes. Refer to the
-"Model Optimizer Operation" section on the
-[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for the
-detailed instruction on how to implement it.
+Two types of the Model Optimizer extensions should be implemented to support custom operations, at a minimum:
+1. Operation class for a new operation. This class stores information about the operation, its attributes, shape inference function, attributes to be saved to an IR and some others internally used attributes. Refer to the "Model Optimizer Operation" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for detailed instructions on how to implement it.
 2. Operation attributes extractor. The extractor is responsible for parsing framework-specific representation of the
 operation and uses corresponding operation class to update graph node attributes with necessary attributes of the
-operation. Refer to the "Operation Extractor" section on the
-[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for the
-detailed instruction on how to implement it.
+operation. Refer to the "Operation Extractor" section of
+[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for detailed instructions on how to implement it.
 
-> **NOTE:** In some cases you may need to implement some transformation to support the operation. This topic is covered
-> in the "Graph Transformation Extensions" section on the
-> [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
+> **NOTE:** In some cases you may need to implement some transformation to support the operation. This topic is covered in the "Graph Transformation Extensions" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
 
 ## Custom Operations Extensions for the Inference Engine
 
-Inference Engine provides extensions mechanism to support new operations. This mechanism is described in the
-[Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md).
+Inference Engine provides extensions mechanism to support new operations. This mechanism is described in [Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md).
 
-Each device plugin includes a library of optimized implementations to execute known operations which must be extended to
-execute a custom operation. The custom operation extension is implemented according to the target device:
+Each device plugin includes a library of optimized implementations to execute known operations which must be extended to execute a custom operation. The custom operation extension is implemented according to the target device:
 
 - Custom Operation CPU Extension
    - A compiled shared library (`.so` or `.dll`) needed by the CPU Plugin for executing the custom operation
    on a CPU. Refer to the [How to Implement Custom CPU Operations](../IE_DG/Extensibility_DG/CPU_Kernel.md) for more
    details.
 - Custom Operation GPU Extension
-   - OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the GPU along with a
-   operation description file (.xml) needed by the GPU Plugin for the custom operation kernel. Refer to the
-   [How to Implement Custom GPU Operations](../IE_DG/Extensibility_DG/GPU_Kernel.md) for more details.
+   - OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the GPU along with an operation description file (.xml) needed by the GPU Plugin for the custom operation kernel. Refer to the [How to Implement Custom GPU Operations](../IE_DG/Extensibility_DG/GPU_Kernel.md) for more details.
 - Custom Operation VPU Extension
-   - OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the VPU along with a
-   operation description file (.xml) needed by the VPU Plugin for the custom operation kernel. Refer to the
-   [How to Implement Custom Operations for VPU](../IE_DG/Extensibility_DG/VPU_Kernel.md) for more details.
+   - OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the VPU along with an  operation description file (.xml) needed by the VPU Plugin for the custom operation kernel. Refer to [How to Implement Custom Operations for VPU](../IE_DG/Extensibility_DG/VPU_Kernel.md) for more details.
 
-Also, it is necessary to implement nGraph custom operation according to the
-[Custom nGraph Operation](../IE_DG/Extensibility_DG/AddingNGraphOps.md) so the Inference Engine can read an IR with this
-operation and correctly infer output tensors shape and type.
+Also, it is necessary to implement nGraph custom operation according to [Custom nGraph Operation](../IE_DG/Extensibility_DG/AddingNGraphOps.md) so the Inference Engine can read an IR with this
+operation and correctly infer output tensor shape and type.
 
 ## Enabling Magnetic Resonance Image Reconstruction Model
-This chapter provides a step-by-step instruction on how to enable the magnetic resonance image reconstruction model
-implemented in the [repository](https://github.com/rmsouza01/Hybrid-CS-Model-MRI/) using a custom operation on CPU. The
-example is prepared for a model generated from the repository with hash `2ede2f96161ce70dcdc922371fe6b6b254aafcc8`.
+This chapter provides step-by-step instructions on how to enable the magnetic resonance image reconstruction model implemented in the [repository](https://github.com/rmsouza01/Hybrid-CS-Model-MRI/) using a custom operation on CPU. The example is prepared for a model generated from the repository with hash `2ede2f96161ce70dcdc922371fe6b6b254aafcc8`.
 
 ### Download and Convert the Model to a Frozen TensorFlow\* Model Format
-The original pre-trained model is provided in the hdf5 format which is not supported by OpenVINO directly and needs to
-be converted to TensorFlow\* frozen model format first.
+The original pre-trained model is provided in the hdf5 format which is not supported by OpenVINO directly and needs to be converted to TensorFlow\* frozen model format first.
 
-1. Download repository `https://github.com/rmsouza01/Hybrid-CS-Model-MRI`:<br
+1. Download repository `https://github.com/rmsouza01/Hybrid-CS-Model-MRI`:<br>
 ```bash
     git clone https://github.com/rmsouza01/Hybrid-CS-Model-MRI
     git checkout 2ede2f96161ce70dcdc922371fe6b6b254aafcc8
@@ -231,15 +211,11 @@ model. The implementation of the Model Optimizer operation should be saved to `m
 
 The attribute `inverse` is a flag specifying type of the FFT to apply: forward or inverse.
 
-See the "Model Optimizer Operation" section on the
-[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for the
-detailed instruction on how to implement the operation.
+See the "Model Optimizer Operation" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for detailed instructions on how to implement the operation.
 
 Now it is necessary to implement extractor for the "IFFT2D" operation according to the
-"Operation Extractor" section on the 
-[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). The
-following snippet provides two extractors: one for "IFFT2D", another one for "FFT2D", however only on of  them is used
-in this example. The implementation should be saved to the file `mo_extensions/front/tf/FFT_ext.py`.
+"Operation Extractor" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). The
+following snippet provides two extractors: one for "IFFT2D", another one for "FFT2D", however only on of  them is used in this example. The implementation should be saved to the file `mo_extensions/front/tf/FFT_ext.py`.
 
 @snippet FFT_ext.py fft_ext:extractor
 
@@ -255,8 +231,7 @@ consumed with the "Complex" operation to produce a tensor of complex numbers. Th
 operations can be removed so the "FFT" operation will get a real value tensor encoding complex numbers. To achieve this
 we implement the front phase transformation which searches for a pattern of two "StridedSlice" operations with specific
 attributes producing data to "Complex" operation and removes it from the graph. Refer to the
-"Pattern-Defined Front Phase Transformations" section on the
-[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for more
+"Pattern-Defined Front Phase Transformations" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for more
 information on how this type of transformation works. The code snippet should be saved to the file
 `mo_extensions/front/tf/Complex.py`.
 
@@ -284,7 +259,7 @@ Now it is possible to convert the model using the following command line:
 ./<MO_INSTALL_DIR>/mo.py --input_model <PATH_TO_MODEL>/wnet_20.pb -b 1 --extensions mo_extensions/
 ```
 
-The sub-graph corresponding to the originally non-supported one is depicted on the image below:
+The sub-graph corresponding to the originally non-supported one is depicted in the image below:
 
 ![Converted sub-graph](img/converted_subgraph.png)
 
@@ -293,8 +268,7 @@ The sub-graph corresponding to the originally non-supported one is depicted on t
 
 ### Inference Engine Extension Implementation
 Now it is necessary to implement the extension for the CPU plugin with operation "FFT" introduced previously. The code
-below is based on the template extension described on the
-[Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md).
+below is based on the template extension described in [Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md).
 
 #### CMake Build File
 The first step is to create a CMake configuration file which builds the extension. The content of the "CMakeLists.txt"
@@ -334,7 +308,7 @@ The last step is to create an extension library "extension.cpp" and "extension.h
 operation for the CPU plugin. The code of  the library is described in the [Extension Library](../IE_DG/Extensibility_DG/Extension.md).
 
 ### Building and Running the Custom Extension
-In order to build the extension run the following:<br>
+To build the extension, run the following:<br>
 ```bash
 mkdir build && cd build
 source /opt/intel/openvino_2021/bin/setupvars.sh
diff --git a/docs/IE_DG/Bfloat16Inference.md b/docs/IE_DG/Bfloat16Inference.md
index 136607af8ad..0461c6ee2b7 100644
--- a/docs/IE_DG/Bfloat16Inference.md
+++ b/docs/IE_DG/Bfloat16Inference.md
@@ -15,7 +15,7 @@ Preserving the exponent bits keeps BF16 to the same range as the FP32 (~1e-38 to
 Truncated mantissa leads to occasionally less precision, but according to [investigations](https://cloud.google.com/blog/products/ai-machine-learning/bfloat16-the-secret-to-high-performance-on-cloud-tpus), neural networks are more sensitive to the size of the exponent than the mantissa size. Also, in lots of models, precision is needed close to zero but not so much at the maximum range.
 Another useful feature of BF16 is possibility to encode INT8 in BF16 without loss of accuracy, because INT8 range completely fits in BF16 mantissa field. It reduces data flow in conversion from INT8 input image data to BF16 directly without intermediate representation in FP32, or in combination of [INT8 inference](Int8Inference.md) and BF16 layers.
 
-See the [Intel's site](https://software.intel.com/sites/default/files/managed/40/8b/bf16-hardware-numerics-definition-white-paper.pdf) for more bfloat16 format details.
+See the ["BFLOAT16 – Hardware Numerics Definition" white paper"](https://software.intel.com/sites/default/files/managed/40/8b/bf16-hardware-numerics-definition-white-paper.pdf) for more bfloat16 format details.
 
 There are two ways to check if CPU device can support bfloat16 computations for models:
 1. Query the instruction set via system `lscpu | grep avx512_bf16` or `cat /proc/cpuinfo | grep avx512_bf16`.
diff --git a/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md b/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md
index 89997e0f0ce..5fc2b3f9102 100644
--- a/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md
+++ b/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md
@@ -1,11 +1,10 @@
 # Inference Engine Developer Guide {#openvino_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide}
 
-> **NOTE:** [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
+> **NOTE:** [Intel® System Studio](https://software.intel.com/content/www/us/en/develop/tools/oneapi/commercial-base-iot.html) (click "Intel® System Studio Users" tab) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
 
-This Guide provides an overview of the Inference Engine describing the typical workflow for performing
-inference of a pre-trained and optimized deep learning model and a set of sample applications.
+This Guide provides an overview of the Inference Engine describing the typical workflow for performing inference of a pre-trained and optimized deep learning model and a set of sample applications.
 
-> **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_group_intel).
+> **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in runtime using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_group_intel).
 
 After you have used the Model Optimizer to create an Intermediate Representation (IR), use the Inference Engine to infer the result for a given input data.
 
@@ -22,7 +21,7 @@ For complete API Reference, see the [Inference Engine API References](./api_refe
 Inference Engine uses a plugin architecture. Inference Engine plugin is a software component that contains complete implementation for inference on a certain Intel&reg; hardware device: CPU, GPU, VPU, etc. Each plugin implements the unified API and provides additional hardware-specific APIs.
 
 ## Modules in the Inference Engine component
-### Core Inference Engine Libraries ###
+### Core Inference Engine Libraries
 
 Your application must link to the core Inference Engine libraries:
 * Linux* OS:
@@ -39,7 +38,7 @@ This library contains the classes to:
 * Manipulate network information (InferenceEngine::CNNNetwork)
 * Execute and pass inputs and outputs (InferenceEngine::ExecutableNetwork and InferenceEngine::InferRequest)
 
-### Plugin Libraries to Read a Network Object ###
+### Plugin Libraries to Read a Network Object
 
 Starting from 2020.4 release, Inference Engine introduced a concept of `CNNNetwork` reader plugins. Such plugins can be automatically dynamically loaded by Inference Engine in runtime depending on file format:
 * Linux* OS:
@@ -49,7 +48,7 @@ Starting from 2020.4 release, Inference Engine introduced a concept of `CNNNetwo
     - `inference_engine_ir_reader.dll` to read a network from IR
     - `inference_engine_onnx_reader.dll` to read a network from ONNX model format
 
-### Device-Specific Plugin Libraries ###
+### Device-Specific Plugin Libraries
 
 For each supported target device, Inference Engine provides a plugin — a DLL/shared library that contains complete implementation for inference on this particular device. The following plugins are available:
 
diff --git a/docs/IE_DG/DynamicBatching.md b/docs/IE_DG/DynamicBatching.md
index a05c218b619..67475f0b83f 100644
--- a/docs/IE_DG/DynamicBatching.md
+++ b/docs/IE_DG/DynamicBatching.md
@@ -1,7 +1,7 @@
 Using Dynamic Batching {#openvino_docs_IE_DG_DynamicBatching}
 ======================
 
-Dynamic Batching feature allows you+ to dynamically change batch size for inference calls
+Dynamic Batching feature allows you to dynamically change batch size for inference calls
 within preset batch size limit.
 This feature might be useful when batch size is unknown beforehand, and using extra large batch size is
 undesired or impossible due to resource limitations.
diff --git a/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md b/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md
index e98edf7e8f0..d3b0714ea44 100644
--- a/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md
+++ b/docs/IE_DG/Extensibility_DG/AddingNGraphOps.md
@@ -1,10 +1,10 @@
 # Custom nGraph Operation {#openvino_docs_IE_DG_Extensibility_DG_AddingNGraphOps}
 
-Inference Engine Extension API enables you to register operation sets (opsets) with custom nGraph operations to support models with operations which OpenVINO™ does not support out-of-the-box.
+Inference Engine Extension API allows you to register operation sets (opsets) with custom nGraph operations to support models with operations which OpenVINO™ does not support out-of-the-box.
 
 ## Operation Class
 
-To add your custom nGraph operation, create a new class that extends `ngraph::Op`, which is in turn derived from `ngraph::Node`, the base class for all graph operations in nGraph. Follow the steps below:
+To add your custom nGraph operation, create a new class that extends `ngraph::Op`, which is in turn derived from `ngraph::Node`, the base class for all graph operations in nGraph. Follow the steps below to add a custom nGraph operation:
 
 1. Add the `NGRAPH_RTTI_DECLARATION` and `NGRAPH_RTTI_DEFINITION` macros which define a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an nGraph operation currently consists of a string identifier and a version number, but this may change in the future.
 
diff --git a/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md b/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md
index e0cdb7cc584..252d67df81f 100644
--- a/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md
+++ b/docs/IE_DG/Extensibility_DG/Custom_ONNX_Ops.md
@@ -46,7 +46,7 @@ Here, the `register_operator` function is called in the constructor of Extension
 The example below demonstrates how to unregister an operator from the destructor of Extension:
 @snippet template_extension/extension.cpp extension:dtor
 
-> **NOTE**: It is mandatory to unregister a custom ONNX operator if it is defined in a dynamic shared library.
+> **REQUIRED**: It is mandatory to unregister a custom ONNX operator if it is defined in a dynamic shared library.
 
 ## Requirements for Building with CMake
 
diff --git a/docs/IE_DG/Extensibility_DG/Intro.md b/docs/IE_DG/Extensibility_DG/Intro.md
index 52546204341..aa2e7d87ba3 100644
--- a/docs/IE_DG/Extensibility_DG/Intro.md
+++ b/docs/IE_DG/Extensibility_DG/Intro.md
@@ -21,16 +21,14 @@ Inference Engine Extension dynamic library contains the following components:
     - Enables the creation of `ngraph::Function` with unsupported operations.
     - Provides a shape inference mechanism for custom operations.
 
-> **NOTE**: This documentation is written based on the `Template extension`, which demonstrates extension 
-development details. Find the complete code of the `Template extension`, which is fully compilable and up-to-date,
-at `<dldt source tree>/docs/template_extension`.
+> **NOTE**: This documentation is written based on the `Template extension`, which demonstrates extension development details. Find the complete code of the `Template extension`, which is fully compilable and up-to-date, at `<dldt source tree>/docs/template_extension`.
 
 ## Execution Kernels
 
 The Inference Engine workflow involves the creation of custom kernels and either custom or existing operations.
 
-An _Operation_ is a network building block implemented in the training framework, for example, `Convolution` in Caffe*.
-A _Kernel_ is defined as the corresponding implementation in the Inference Engine.
+An _operation_ is a network building block implemented in the training framework, for example, `Convolution` in Caffe*.
+A _kernel_ is defined as the corresponding implementation in the Inference Engine.
 
 Refer to the [Model Optimizer Extensibility](../../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md)
 for details on how a mapping between framework operations and Inference Engine kernels is registered.
diff --git a/docs/IE_DG/Extensibility_DG/VPU_Kernel.md b/docs/IE_DG/Extensibility_DG/VPU_Kernel.md
index ec102b1b51c..03309759831 100644
--- a/docs/IE_DG/Extensibility_DG/VPU_Kernel.md
+++ b/docs/IE_DG/Extensibility_DG/VPU_Kernel.md
@@ -17,8 +17,7 @@ OpenCL support is provided by ComputeAorta*, and is distributed under a license
 
 The OpenCL toolchain for the Intel® Neural Compute Stick 2 supports offline compilation only, so first compile OpenCL C code using the standalone `clc` compiler. You can find the compiler binary at `<INSTALL_DIR>/deployment_tools/tools/cl_compiler`.
 
-> **NOTE:** By design, custom OpenCL layers support any OpenCL kernels written with 1.2 version assumed. It also supports half float
-extension and is optimized for this type, because it is a native type for Intel® Movidius™ VPUs.
+> **NOTE:** By design, custom OpenCL layers support any OpenCL kernels written with 1.2 version assumed. It also supports half float extension and is optimized for this type, because it is a native type for Intel® Movidius™ VPUs.
 
 1. Prior to running a compilation, make sure that the following variables are set:
    * `SHAVE_MA2X8XLIBS_DIR=<INSTALL_DIR>/deployment_tools/tools/cl_compiler/lib/`
@@ -224,7 +223,7 @@ Here is a short list of optimization tips:
 annotate the code with pragmas as appropriate. The `ocl_grn` version with `#&zwj;pragma unroll 4` is up to 50% faster, most of which comes from unrolling the first loop, because LLVM, in general, is better in scheduling 3-stage loops (load-compute-store), while the fist loop
  `variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]);` is only 2-stage (load-compute). Pay
 attention to unrolling such cases first. Unrolling factor is loop-dependent. Choose the smallest number that
-still improves performance as an optimum between the kernel size and execution speed. For this specific kernel, changing the unroll factor from `4`to `6` results in the same performance, so unrolling factor equal to 4 is an optimum. For Intel® Neural Compute Stick 2, unrolling is conjugated with the automatic software pipelining for load, store, and compute stages:
+still improves performance as an optimum between the kernel size and execution speed. For this specific kernel, changing the unroll factor from `4` to `6` results in the same performance, so unrolling factor equal to 4 is an optimum. For Intel® Neural Compute Stick 2, unrolling is conjugated with the automatic software pipelining for load, store, and compute stages:
 ```cpp
 __kernel void ocl_grn(__global const half* restrict src_data, __global half* restrict dst_data, int C, float bias)
 {
diff --git a/docs/IE_DG/GPU_Kernels_Tuning.md b/docs/IE_DG/GPU_Kernels_Tuning.md
index 4bbe315e42c..5bb6a8334b2 100644
--- a/docs/IE_DG/GPU_Kernels_Tuning.md
+++ b/docs/IE_DG/GPU_Kernels_Tuning.md
@@ -10,11 +10,10 @@ tuning for new kind of models, hardwares or drivers.
 
 ## Tuned data
 
-GPU tuning data is saved in JSON format.
-File's content is composed of 2 types of attributes and 1 type of value:
-1. Execution units number - this attribute splits the content into different EU sections.
-2. Hash - hashed tuned kernel data.
-Key: Array with kernel name and kernel's mode index.
+GPU tuning data is saved in JSON format. The file is composed of 2 types of attributes and 1 type of value:
+* Execution units number (attribute): splits the content into different EU sections
+* Hash (attribute): hashed tuned kernel data
+* Key (value): Array with kernel name and kernel's mode index
 
 ## Usage
 
diff --git a/docs/IE_DG/InferenceEngine_QueryAPI.md b/docs/IE_DG/InferenceEngine_QueryAPI.md
index 60497bbebdf..8588e00e5ce 100644
--- a/docs/IE_DG/InferenceEngine_QueryAPI.md
+++ b/docs/IE_DG/InferenceEngine_QueryAPI.md
@@ -57,7 +57,7 @@ For documentation about common configuration keys, refer to `ie_plugin_config.hp
 
 @snippet snippets/InferenceEngine_QueryAPI2.cpp part2
 
-A returned value looks as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`.
+A returned value appears as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`.
 
 > **NOTE**: All metrics have specific type, which is specified during metric instantiation. The list of common device-agnostic metrics can be found in `ie_plugin_config.hpp`. Device specific metrics (for example, for `HDDL`, `MYRIAD` devices) can be found in corresponding plugin folders.
 
diff --git a/docs/IE_DG/Int8Inference.md b/docs/IE_DG/Int8Inference.md
index 1f580bbd4e2..917c7836de2 100644
--- a/docs/IE_DG/Int8Inference.md
+++ b/docs/IE_DG/Int8Inference.md
@@ -27,7 +27,7 @@ Let's explore quantized [TensorFlow* implementation of ResNet-50](https://github
 ```sh
 ./downloader.py --name resnet-50-tf --precisions FP16-INT8
 ```
-After that you should quantize model by [Model Quantizer](@ref omz_tools_downloader) tool.
+After that you should quantize model by the [Model Quantizer](@ref omz_tools_downloader) tool.
 ```sh
 ./quantizer.py --model_dir public/resnet-50-tf --dataset_dir <DATASET_DIR> --precisions=FP16-INT8
 ```
@@ -35,7 +35,7 @@ The simplest way to infer the model and collect performance counters is [C++ Ben
 ```sh
 ./benchmark_app -m resnet-50-tf.xml -d CPU -niter 1 -api sync -report_type average_counters  -report_folder pc_report_dir
 ```
-If you infer the model in the OpenVINO™ CPU plugin and collect performance counters, all operations (except last not quantized SoftMax) are executed in INT8 precision.  
+If you infer the model with the OpenVINO™ CPU plugin and collect performance counters, all operations (except last not quantized SoftMax) are executed in INT8 precision.  
 
 ## Low-Precision 8-bit Integer Inference Workflow
 
@@ -46,7 +46,7 @@ For 8-bit integer computations, a model must be quantized. Quantized models can
 
    When you pass the quantized IR to the OpenVINO™ plugin, the plugin automatically recognizes it as a quantized model and performs 8-bit inference. Note, if you pass a quantized model to another plugin that does not support 8-bit inference but supports all operations from the model, the model is inferred in precision that this plugin supports.
 
-2. *Run-time stage*. This stage is an internal procedure of the OpenVINO™ plugin. During this stage, the quantized model is loaded to the plugin. The plugin uses `Low Precision Transformation` component to update the model to infer it in low precision:
+2. *Runtime stage*. This stage is an internal procedure of the OpenVINO™ plugin. During this stage, the quantized model is loaded to the plugin. The plugin uses `Low Precision Transformation` component to update the model to infer it in low precision:
    - Update `FakeQuantize` layers to have quantized output tensors in low precision range and add dequantization layers to compensate the update. Dequantization layers are pushed through as many layers as possible to have more layers in low precision. After that, most layers have quantized input tensors in low precision range and can be inferred in low precision. Ideally, dequantization layers should be fused in the next `FakeQuantize` layer.
    - Weights are quantized and stored in `Constant` layers. 
 
diff --git a/docs/IE_DG/Integrate_with_customer_application_new_API.md b/docs/IE_DG/Integrate_with_customer_application_new_API.md
index 27cc6b7e32e..9e35f483717 100644
--- a/docs/IE_DG/Integrate_with_customer_application_new_API.md
+++ b/docs/IE_DG/Integrate_with_customer_application_new_API.md
@@ -105,34 +105,21 @@ methods:
 @snippet snippets/Integrate_with_customer_application_new_API.cpp part7
 
 6) **Prepare input**. You can use one of the following options to prepare input:
-    * **Optimal way for a single network.** Get blobs allocated by an infer request using `InferenceEngine::InferRequest::GetBlob()`
-    and feed an image and the input data to the blobs. In this case, input data must be aligned (resized manually) with a
-    given blob size and have a correct color format.
+    * **Optimal way for a single network.** Get blobs allocated by an infer request using `InferenceEngine::InferRequest::GetBlob()` and feed an image and the input data to the blobs. In this case, input data must be aligned (resized manually) with a given blob size and have a correct color format.
 
 @snippet snippets/Integrate_with_customer_application_new_API.cpp part8
 
-    * **Optimal way for a cascade of networks (output of one network is input for another).** Get output blob from the first
-    request using `InferenceEngine::InferRequest::GetBlob()` and set it as input for the second request using
-    `InferenceEngine::InferRequest::SetBlob()`.
+    * **Optimal way for a cascade of networks (output of one network is input for another).** Get output blob from the first request using `InferenceEngine::InferRequest::GetBlob()` and set it as input for the second request using `InferenceEngine::InferRequest::SetBlob()`.
 
 @snippet snippets/Integrate_with_customer_application_new_API.cpp part9
 
-    * **Optimal way to handle ROI (a ROI object located inside of input of one network is input for another).** It is
-    possible to re-use shared input by several networks. You do not need to allocate separate input blob for a network if
-    it processes a ROI object located inside of already allocated input of a previous network. For instance, when first
-    network detects objects on a video frame (stored as input blob) and second network accepts detected bounding boxes
-    (ROI inside of the frame) as input.
-    In this case, it is allowed to re-use pre-allocated input blob (used by first network) by second network and just crop
-    ROI without allocation of new memory using `InferenceEngine::make_shared_blob()` with passing of
-    `InferenceEngine::Blob::Ptr` and `InferenceEngine::ROI` as parameters.
+    * **Optimal way to handle ROI (a ROI object located inside of input of one network is input for another).** It is possible to re-use shared input by several networks. You do not need to allocate separate input blob for a network if it processes a ROI object located inside of already allocated input of a previous network. For instance, when first network detects objects on a video frame (stored as input blob) and second network accepts detected bounding boxes (ROI inside of the frame) as input. In this case, it is allowed to re-use pre-allocated input blob (used by first network) by second network and just crop ROI without allocation of new memory using `InferenceEngine::make_shared_blob()` with passing of `InferenceEngine::Blob::Ptr` and `InferenceEngine::ROI` as parameters.
 
 @snippet snippets/Integrate_with_customer_application_new_API.cpp part10
 
-      Make sure that shared input is kept valid during execution of each network. Otherwise, ROI blob may be corrupted if the
-      original input blob (that ROI is cropped from) has already been rewritten.
+Make sure that shared input is kept valid during execution of each network. Otherwise, ROI blob may be corrupted if the original input blob (that ROI is cropped from) has already been rewritten.
 
-    * Allocate input blobs of the appropriate types and sizes, feed an image and the input data to the blobs, and call
-    `InferenceEngine::InferRequest::SetBlob()` to set these blobs for an infer request:
+    * Allocate input blobs of the appropriate types and sizes, feed an image and the input data to the blobs, and call `InferenceEngine::InferRequest::SetBlob()` to set these blobs for an infer request:
 
 @snippet snippets/Integrate_with_customer_application_new_API.cpp part11
 
@@ -140,7 +127,7 @@ methods:
 
 > **NOTE:**
 >
-> * `SetBlob()` method compares precision and layout of an input blob with ones defined on step 3 and
+> * The `SetBlob()` method compares precision and layout of an input blob with the ones defined in step 3 and
 > throws an exception if they do not match. It also compares a size of the input blob with input
 > size of the read network. But if input was configured as resizable, you can set an input blob of
 > any size (for example, any ROI blob). Input resize will be invoked automatically using resize
@@ -154,8 +141,7 @@ methods:
 > corresponding values of the read network. No pre-processing will happen for this blob. If you
 > call `GetBlob()` after `SetBlob()`, you will get the blob you set in `SetBlob()`.
 
-7) **Do inference** by calling the `InferenceEngine::InferRequest::StartAsync` and `InferenceEngine::InferRequest::Wait`
-methods for asynchronous request:
+7) **Do inference** by calling the `InferenceEngine::InferRequest::StartAsync` and `InferenceEngine::InferRequest::Wait` methods for asynchronous request:
 
 @snippet snippets/Integrate_with_customer_application_new_API.cpp part12
 
@@ -164,12 +150,10 @@ or by calling the `InferenceEngine::InferRequest::Infer` method for synchronous
 @snippet snippets/Integrate_with_customer_application_new_API.cpp part13
 
 `StartAsync` returns immediately and starts inference without blocking main thread, `Infer` blocks
- main thread and returns when inference is completed.
-Call `Wait` for waiting result to become available for asynchronous request.
+ main thread and returns when inference is completed. Call `Wait` for waiting result to become available for asynchronous request.
 
 There are three ways to use it:
-* specify maximum duration in milliseconds to block for. The method is blocked until the specified timeout has elapsed,
-or the result becomes available, whichever comes first.
+* specify maximum duration in milliseconds to block for. The method is blocked until the specified timeout has elapsed, or the result becomes available, whichever comes first.
 * `InferenceEngine::InferRequest::WaitMode::RESULT_READY` - waits until inference result becomes available
 * `InferenceEngine::InferRequest::WaitMode::STATUS_ONLY` - immediately returns request status.It does not
 block or interrupts current thread.
@@ -182,8 +166,7 @@ While request is ongoing, all its methods except `InferenceEngine::InferRequest:
 exception.
 
 8) Go over the output blobs and **process the results**.
-Note that casting `Blob` to `TBlob` via `std::dynamic_pointer_cast` is not recommended way,
-better to access data via `buffer()` and `as()` methods as follows:
+Note that casting `Blob` to `TBlob` via `std::dynamic_pointer_cast` is not the recommended way. It's better to access data via the `buffer()` and `as()` methods as follows:
 
 @snippet snippets/Integrate_with_customer_application_new_API.cpp part14
 
@@ -217,7 +200,7 @@ add_executable(${PROJECT_NAME} src/main.cpp)
 target_link_libraries(${PROJECT_NAME} PRIVATE ${InferenceEngine_LIBRARIES} ${OpenCV_LIBS} ${NGRAPH_LIBRARIES})
 ```
 3. **To build your project** using CMake with the default build tools currently available on your machine, execute the following commands:
-> **NOTE**: Make sure **Set the Environment Variables** step in [OpenVINO Installation](../../inference-engine/samples/hello_nv12_input_classification/README.md) document is applied to your terminal, otherwise `InferenceEngine_DIR` and `OpenCV_DIR` variables won't be configured properly to pass `find_package` calls.
+> **NOTE**: Make sure you set environment variables first by running `<INSTALL_DIR>/bin/setupvars.sh` (or setupvars.bat for Windows)`. Otherwise the `InferenceEngine_DIR` and `OpenCV_DIR` variables won't be configured properly to pass `find_package` calls.
 ```sh
 cd build/
 cmake ../project
diff --git a/docs/IE_DG/Intro_to_Performance.md b/docs/IE_DG/Intro_to_Performance.md
index 6dbdd35cef4..66fcf48c34f 100644
--- a/docs/IE_DG/Intro_to_Performance.md
+++ b/docs/IE_DG/Intro_to_Performance.md
@@ -14,7 +14,7 @@ You can find more information, including preferred data types for specific devic
 
 ## Lowering Inference Precision
 Default optimization is used for CPU and implies that inference is made with lower precision if it is possible on a given platform to reach better performance with acceptable range of accuracy.
-This approach is used for CPU device if platform supports the AVX512_BF16 instruction. In this case, a regular float32 model is converted to [bfloat16](Bfloat16Inference.md) internal representation and inference is provided with bfloat16 layers usage.
+This approach can be used for CPU devices where the platform supports the AVX512_BF16 instruction. In this case, a regular float32 model is converted to [bfloat16](Bfloat16Inference.md) internal representation and inference is provided with bfloat16 layers usage.
 Below is the example command line to disable this feature on the CPU device with the AVX512_BF16 instruction and execute regular float32.
 ```
 $ benchmark_app -m <model.xml> -enforcebf16=false
diff --git a/docs/IE_DG/Introduction.md b/docs/IE_DG/Introduction.md
index 6d3d5be66c6..1682ec7466e 100644
--- a/docs/IE_DG/Introduction.md
+++ b/docs/IE_DG/Introduction.md
@@ -92,7 +92,7 @@ Refer to a dedicated description about [Intermediate Representation and Operatio
 ## nGraph Integration
 
 OpenVINO toolkit is powered by nGraph capabilities for Graph construction API, Graph transformation engine and Reshape.
-nGraph Function is used as an intermediate representation for a model in the run-time underneath the CNNNetwork API.
+nGraph Function is used as an intermediate representation for a model in the runtime underneath the CNNNetwork API.
 The conventional representation for CNNNetwork is still available if requested for backward compatibility when some conventional API methods are used.
 Please refer to the [Overview of nGraph](../nGraph_DG/nGraph_dg.md) describing the details of nGraph representation.
 
diff --git a/docs/IE_DG/Memory_primitives.md b/docs/IE_DG/Memory_primitives.md
index a6fed433d3c..507757ee650 100644
--- a/docs/IE_DG/Memory_primitives.md
+++ b/docs/IE_DG/Memory_primitives.md
@@ -8,7 +8,7 @@ Using this class you can read and write memory, get information about the memory
 
 The right way to create <code>Blob</code> objects with a specific layout is to use constructors with <code>InferenceEngine::TensorDesc</code>.
 <pre class="brush:cpp">
-InferenceEngige::TensorDesc tdesc(FP32, {1, 3, 227, 227}, InferenceEngine::Layout::NCHW);
+InferenceEngine::TensorDesc tdesc(FP32, {1, 3, 227, 227}, InferenceEngine::Layout::NCHW);
 InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob<float>(tdesc);
 </pre>
 
diff --git a/docs/IE_DG/ONNX_Support.md b/docs/IE_DG/ONNX_Support.md
index 80afe82df44..5b85b9185f0 100644
--- a/docs/IE_DG/ONNX_Support.md
+++ b/docs/IE_DG/ONNX_Support.md
@@ -40,8 +40,8 @@ The described mechanism is the only possibility to read weights from external fi
 * `const std::string& binPath`
 * `const Blob::CPtr& weights`
 
-You can find more details about external data mechanism in [ONNX documentation](https://github.com/onnx/onnx/blob/master/docs/ExternalData.md).
-To convert a model to use external data feature, you can use [ONNX helpers functions](https://github.com/onnx/onnx/blob/master/onnx/external_data_helper.py).
+You can find more details about the external data mechanism in [ONNX documentation](https://github.com/onnx/onnx/blob/master/docs/ExternalData.md).
+To convert a model to use the external data feature, you can use [ONNX helper functions](https://github.com/onnx/onnx/blob/master/onnx/external_data_helper.py).
 
 **Unsupported types of tensors:**
 
diff --git a/docs/IE_DG/ShapeInference.md b/docs/IE_DG/ShapeInference.md
index 93b27c621b5..dcc4b5c3f88 100644
--- a/docs/IE_DG/ShapeInference.md
+++ b/docs/IE_DG/ShapeInference.md
@@ -34,7 +34,7 @@ If a model has a hard-coded batch dimension, use `InferenceEngine::CNNNetwork::s
 Inference Engine takes three kinds of a model description as an input, which are converted into an `InferenceEngine::CNNNetwork` object:
 1. [Intermediate Representation (IR)](../MO_DG/IR_and_opsets.md) through `InferenceEngine::Core::ReadNetwork`
 2. [ONNX model](../IE_DG/OnnxImporterTutorial.md) through `InferenceEngine::Core::ReadNetwork`
-3. [nGraph::Function](../nGraph_DG/nGraph_dg.md) through the constructor of `InferenceEngine::CNNNetwork`
+3. [nGraph function](../nGraph_DG/nGraph_dg.md) through the constructor of `InferenceEngine::CNNNetwork`
 
 `InferenceEngine::CNNNetwork` keeps an `ngraph::Function` object with the model description internally.
 The object should have fully defined input shapes to be successfully loaded to the Inference Engine plugins.
diff --git a/docs/IE_DG/inference_engine_intro.md b/docs/IE_DG/inference_engine_intro.md
index 717813cdf76..847c0a59e35 100644
--- a/docs/IE_DG/inference_engine_intro.md
+++ b/docs/IE_DG/inference_engine_intro.md
@@ -100,18 +100,18 @@ The common workflow contains the following steps:
 
 3. **Prepare inputs and outputs format** - After loading the network, specify input and output precision and the layout on the network. For these specification, use the `InferenceEngine::CNNNetwork::getInputsInfo()` and `InferenceEngine::CNNNetwork::getOutputsInfo()`.
 
-4. Pass per device loading configurations specific to this device (`InferenceEngine::Core::SetConfig`), and register extensions to this device (`InferenceEngine::Core::AddExtension`).
+4. **Pass per device loading configurations** specific to this device (`InferenceEngine::Core::SetConfig`) and register extensions to this device (`InferenceEngine::Core::AddExtension`).
 
-4. **Compile and Load Network to device** - Use the `InferenceEngine::Core::LoadNetwork()` method with specific device (e.g. `CPU`, `GPU`, etc.) to compile and load the network on the device. Pass in the per-target load configuration for this compilation and load operation.
+5. **Compile and Load Network to device** - Use the `InferenceEngine::Core::LoadNetwork()` method with specific device (e.g. `CPU`, `GPU`, etc.) to compile and load the network on the device. Pass in the per-target load configuration for this compilation and load operation.
 
-5. **Set input data** - With the network loaded, you have an `InferenceEngine::ExecutableNetwork` object. Use this object to create an `InferenceEngine::InferRequest` in which you signal the input buffers to use for input and output. Specify a device-allocated memory and copy it into the device memory directly, or tell the device to use your application memory to save a copy.
+6. **Set input data** - With the network loaded, you have an `InferenceEngine::ExecutableNetwork` object. Use this object to create an `InferenceEngine::InferRequest` in which you signal the input buffers to use for input and output. Specify a device-allocated memory and copy it into the device memory directly, or tell the device to use your application memory to save a copy.
 
-6. **Execute** - With the input and output memory now defined, choose your execution mode:
+7. **Execute** - With the input and output memory now defined, choose your execution mode:
 
     * Synchronously - `InferenceEngine::InferRequest::Infer()` method. Blocks until inference is completed.
     * Asynchronously - `InferenceEngine::InferRequest::StartAsync()` method. Check status with the `InferenceEngine::InferRequest::Wait()` method (0 timeout), wait, or specify a completion callback.
 
-7. **Get the output** - After inference is completed, get the output memory or read the memory you provided earlier. Do this with the `InferenceEngine::InferRequest::GetBlob()` method.
+8. **Get the output** - After inference is completed, get the output memory or read the memory you provided earlier. Do this with the `InferenceEngine::IInferRequest::GetBlob()` method.
 
 
 Further Reading
diff --git a/docs/IE_DG/network_state_intro.md b/docs/IE_DG/network_state_intro.md
index e55b081a9dd..778cc2c29b3 100644
--- a/docs/IE_DG/network_state_intro.md
+++ b/docs/IE_DG/network_state_intro.md
@@ -7,7 +7,7 @@ This section describes how to work with stateful networks in OpenVINO toolkit, s
 
 The section additionally provides small examples of stateful network and code to infer it.
 
-## What is a stateful network
+## What is a Stateful Network
 
  Several use cases require processing of data sequences. When length of a sequence is known and small enough, 
  we can process it with RNN like networks that contain a cycle inside. But in some cases, like online speech recognition of time series 
@@ -21,7 +21,7 @@ The section additionally provides small examples of stateful network and code to
  OpenVINO also contains special API to simplify work with networks with states. State is automatically saved between inferences, 
  and there is a way to reset state when needed. You can also read state or set it to some new value between inferences.
  
-## OpenVINO state representation
+## OpenVINO State Representation
 
  OpenVINO contains a special abstraction `Variable` to represent a state in a network. There are two operations to work with the state: 
 * `Assign` to save value in state
@@ -30,14 +30,13 @@ The section additionally provides small examples of stateful network and code to
 You can find more details on these operations in [ReadValue specification](../ops/infrastructure/ReadValue_3.md) and 
 [Assign specification](../ops/infrastructure/Assign_3.md).
 
-## Examples of representation of a network with states
+## Examples of Representation of a Network with States
+
+To get a model with states ready for inference, you can convert a model from another framework to IR with Model Optimizer or create an nGraph function (details can be found in [Build nGraph Function section](../nGraph_DG/build_function.md)). Let's represent the following graph in both forms:
 
-To get a model with states ready for inference, you can convert a model from another framework to IR with Model Optimizer or create an nGraph function 
-(details can be found in [Build nGraph Function section](../nGraph_DG/build_function.md)). 
-Let's represent the following graph in both forms:
 ![state_network_example]
 
-### Example of IR with state
+### Example of IR with State
 
 The `bin` file for this graph should contain float 0 in binary form. Content of `xml` is the following.
 
@@ -150,7 +149,7 @@ The `bin` file for this graph should contain float 0 in binary form. Content of
 </net>
 ```
 
-### Example of creating model nGraph API
+### Example of Creating Model nGraph API
 
 ```cpp
 	#include <ngraph/opsets/opset6.hpp>
@@ -182,8 +181,7 @@ sink from `ngraph::Function` after deleting the node from graph with the `delete
 
 ## OpenVINO state API
 
- Inference Engine has the `InferRequest::QueryState` method  to get the list of states from a network and `IVariableState` interface to operate with states. Below you can find brief description of methods and the workable example of how to use this interface.  
- is below and next section contains small workable example how this interface can be used.
+ Inference Engine has the `InferRequest::QueryState` method  to get the list of states from a network and `IVariableState` interface to operate with states. Below you can find brief description of methods and the workable example of how to use this interface.
  
  * `std::string GetName() const`
    returns name(variable_id) of according Variable
@@ -194,7 +192,7 @@ sink from `ngraph::Function` after deleting the node from graph with the `delete
  * `Blob::CPtr GetState() const`
    returns current value of state
 
-## Example of stateful network inference
+## Example of Stateful Network Inference
 
 Let's take an IR from the previous section example. The example below demonstrates inference of two independent sequences of data. State should be reset between these sequences.
 
@@ -211,7 +209,7 @@ Decsriptions can be found in [Samples Overview](./Samples_Overview.md)
 [state_network_example]: ./img/state_network_example.png
 
 
-## LowLatency transformation
+## LowLatency Transformation
 
 If the original framework does not have a special API for working with states, after importing the model, OpenVINO representation will not contain Assign/ReadValue layers. For example, if the original ONNX model contains RNN operations, IR will contain TensorIterator operations and the values will be obtained only after the execution of whole TensorIterator primitive, intermediate values from each iteration will not be available. To be able to work with these intermediate values of each iteration and receive them with a low latency after each infer request, a special LowLatency transformation was introduced.
 
@@ -221,15 +219,14 @@ LowLatency transformation changes the structure of the network containing [Tenso
 
 After applying the transformation, ReadValue operations can receive other operations as an input, as shown in the picture above. These inputs should set the initial value for initialization of ReadValue operations. However, such initialization is not supported in the current State API implementation. Input values are ignored and the initial values for the ReadValue operations are set to zeros unless otherwise specified by the user via [State API](#openvino-state-api).
 
-### Steps to apply LowLatency transformation
+### Steps to apply LowLatency Transformation
 
-1. Get CNNNetwork. Any way is acceptable:
+1. Get CNNNetwork. Either way is acceptable:
 
-	* [from IR or ONNX model](Integrate_with_customer_application_new_API.md#integration-steps)
+	* [from IR or ONNX model](./Integrate_with_customer_application_new_API.md)
 	* [from nGraph Function](../nGraph_DG/build_function.md)
 
-2. [Reshape](ShapeInference) CNNNetwork network if necessary 
-**Necessary case:** the sequence_lengths dimension of input > 1, it means the TensorIterator layer will have number_iterations > 1. We should reshape the inputs of the network to set sequence_dimension exactly to 1.
+2. [Reshape](ShapeInference.md) the CNNNetwork network if necessary. **Necessary case:** where the sequence_lengths dimension of input > 1, it means TensorIterator layer will have number_iterations > 1. We should reshape the inputs of the network to set sequence_dimension to exactly 1.
 
 Usually, the following exception, which occurs after applying a transform when trying to infer the network in a plugin, indicates the need to apply reshape feature: `C++ exception with description "Function is incorrect. Assign and ReadValue operations must be used in pairs in the network."`
 This means that there are several pairs of Assign/ReadValue operations with the same variable_id in the network, operations were inserted into each iteration of the TensorIterator.
@@ -280,7 +277,7 @@ InferenceEngine::LowLatency(cnnNetwork);
 4. Use state API. See sections [OpenVINO state API](#openvino-state-api), [Example of stateful network inference](#example-of-stateful-network-inference).
 
  
-### Known limitations
+### Known Limitations
 1. Parameters connected directly to ReadValues (States) after the transformation is applied are not allowed.
 
 	Unnecessary parameters may remain on the graph after applying the transformation. The automatic handling of this case inside the transformation is not possible now. Such Parameters should be removed manually from `ngraph::Function` or replaced with a Constant.
diff --git a/docs/IE_DG/supported_plugins/GNA.md b/docs/IE_DG/supported_plugins/GNA.md
index f4729757184..7503766fa42 100644
--- a/docs/IE_DG/supported_plugins/GNA.md
+++ b/docs/IE_DG/supported_plugins/GNA.md
@@ -73,7 +73,7 @@ Limitations include:
 
 #### Experimental Support for 2D Convolutions
 
-The Intel® GNA hardware natively supports only 1D convolution.
+The Intel® GNA hardware natively supports only 1D convolutions.
 
 However, 2D convolutions can be mapped to 1D when a convolution kernel moves in a single direction. GNA Plugin performs such a transformation for Kaldi `nnet1` convolution. From this perspective, the Intel® GNA hardware convolution operation accepts an `NHWC` input and produces an `NHWC` output. Because OpenVINO™ only supports the `NCHW` layout, you may need to insert `Permute` layers before or after convolutions.
 
diff --git a/docs/IE_DG/supported_plugins/GPU_RemoteBlob_API.md b/docs/IE_DG/supported_plugins/GPU_RemoteBlob_API.md
index 227ce101723..c24faa3541f 100644
--- a/docs/IE_DG/supported_plugins/GPU_RemoteBlob_API.md
+++ b/docs/IE_DG/supported_plugins/GPU_RemoteBlob_API.md
@@ -44,8 +44,8 @@ To request the internal context of the given `ExecutableNetwork`, use the `GetCo
 
 ## Shared Blob User-Side Wrappers
 
-The classes that implement the `RemoteBlob` interface both are wrappers for native API 
-memory handles (which can be obtained from them at any moment) and act just like regular OpenVINO™ 
+The classes that implement the `RemoteBlob` interface are both wrappers for native API 
+memory handles (which can be obtained from them at any time) and act just like regular OpenVINO™ 
 `Blob` objects.
 
 Once you obtain the context, you can use it to compile a new `ExecutableNetwork` or create `RemoteBlob` 
diff --git a/docs/IE_DG/supported_plugins/HDDL.md b/docs/IE_DG/supported_plugins/HDDL.md
index 9154f1d3f30..5108e303594 100644
--- a/docs/IE_DG/supported_plugins/HDDL.md
+++ b/docs/IE_DG/supported_plugins/HDDL.md
@@ -1,16 +1,12 @@
 # HDDL Plugin {#openvino_docs_IE_DG_supported_plugins_HDDL}
 
-## Introducing HDDL Plugin
+## Introducing the HDDL Plugin
 
-The Inference Engine HDDL plugin is developed for inference of neural networks on Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs which is designed for use cases those require large throughput of deep learning inference. It provides dozens amount of throughput as MYRIAD Plugin.
+The Inference Engine HDDL plugin was developed for inference with neural networks on Intel&reg; Vision Accelerator Design with Intel&reg; Movidius&trade; VPUs. It is designed for use cases that require large throughput for deep learning inference, up to dozens of times more than the MYRIAD Plugin.
 
-## Installation on Linux* OS
+## Configuring the HDDL Plugin
 
-For installation instructions, refer to the [Installation Guide for Linux\*](VPU.md).
-
-## Installation on Windows* OS
-
-For installation instructions, refer to the [Installation Guide for Windows\*](Supported_Devices.md).
+To configure your Intel® Vision Accelerator Design With Intel® Movidius™ on supported OSs, refer to the Steps for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs section in the installation guides for [Linux](../../install_guides/installing-openvino-linux.md) or [Windows](../../install_guides/installing-openvino-windows.md).
 
 ## Supported networks
 
diff --git a/docs/IE_DG/supported_plugins/HETERO.md b/docs/IE_DG/supported_plugins/HETERO.md
index 9b5f69ce687..f2b7521457e 100644
--- a/docs/IE_DG/supported_plugins/HETERO.md
+++ b/docs/IE_DG/supported_plugins/HETERO.md
@@ -1,12 +1,12 @@
 Heterogeneous Plugin {#openvino_docs_IE_DG_supported_plugins_HETERO}
 =======
 
-## Introducing Heterogeneous Plugin
+## Introducing the Heterogeneous Plugin
 
 The heterogeneous plugin enables computing for inference on one network on several devices.
-Purposes to execute networks in heterogeneous mode
-* To utilize accelerators power and calculate heaviest parts of network on accelerator and execute not supported layers on fallback devices like CPU
-* To utilize all available hardware more efficiently during one inference
+The purposes of executing networks in heterogeneous mode:
+* Utilize the power of accelerators to calculate heaviest parts of the network and execute unsupported layers on fallback devices like the CPU 
+* Utilize all available hardware more efficiently during one inference
 
 The execution through heterogeneous plugin can be divided to two independent steps:
 * Setting of affinity to layers
@@ -14,14 +14,13 @@ The execution through heterogeneous plugin can be divided to two independent ste
 
 These steps are decoupled. The setting of affinity can be done automatically using fallback policy or in manual mode.
 
-The fallback automatic policy means greedy behavior and assigns all layers which can be executed on certain device on that device follow priorities.
-Automatic policy does not take into account such plugin peculiarities as inability to infer some layers without other special layers placed before of after that layers. It is plugin responsibility to solve such cases. If device plugin does not support subgraph topology constructed by Hetero plugin affinity should be set manually.
+The fallback automatic policy causes "greedy" behavior and assigns all layers that can be executed on certain device according to the priorities you specify (for example, `HETERO:GPU,CPU`).
+Automatic policy does not take into account plugin peculiarities such as the inability to infer some layers without other special layers placed before or after that layer. The plugin is responsible for solving such cases. If the device plugin does not support the subgraph topology constructed by the Hetero plugin, then you should set affinity manually.
 
 Some of the topologies are not friendly to heterogeneous execution on some devices or cannot be executed in such mode at all.
-Example of such networks might be networks having activation layers which are not supported on primary device.
-If transmitting of data from one part of network to another part in heterogeneous mode takes relatively much time,
-then it is not much sense to execute them in heterogeneous mode on these devices.
-In this case you can define heaviest part manually and set affinity thus way to avoid sending of data back and forth many times during one inference.
+Examples of such networks are networks having activation layers which are not supported on primary device.
+If transmitting data from one part of a network to another part in heterogeneous mode takes more time than in normal mode, it may not make sense to execute them in heterogeneous mode.
+In this case, you can define heaviest part manually and set the affinity to avoid sending data back and forth many times during one inference.
 
 ## Annotation of Layers per Device and Default Fallback Policy
 Default fallback policy decides which layer goes to which device automatically according to the support in dedicated plugins (FPGA, GPU, CPU, MYRIAD).
diff --git a/docs/IE_DG/supported_plugins/MULTI.md b/docs/IE_DG/supported_plugins/MULTI.md
index f20443ca4c2..ac161db9147 100644
--- a/docs/IE_DG/supported_plugins/MULTI.md
+++ b/docs/IE_DG/supported_plugins/MULTI.md
@@ -1,48 +1,44 @@
 # Multi-Device Plugin {#openvino_docs_IE_DG_supported_plugins_MULTI}
 
-## Introducing Multi-Device Execution
+## Introducing the Multi-Device Plugin
 
-Multi-Device plugin automatically assigns inference requests to available computational devices to execute the requests in parallel.
-Potential gains are as follows 
+The Multi-Device plugin automatically assigns inference requests to available computational devices to execute the requests in parallel. Potential gains are as follows:
 * Improved throughput that multiple devices can deliver (compared to single-device execution)
 * More consistent performance, since the devices can now share the inference burden
 (so that if one device is becoming too busy, another device can take more of the load)
 
-Notice that with multi-device the application logic left unchanged, so you don't need to explicitly load the network to every device, 
-create and balance the inference requests and so on. From the application point of view, this is just another device that handles the actual machinery. 
+Notice that with multi-device the application logic is left unchanged, so you don't need to explicitly load the network to every device, create and balance the inference requests and so on. From the application point of view, this is just another device that handles the actual machinery. 
 The only thing that is required to leverage performance is to provide the multi-device (and hence the underlying devices) with enough inference requests to crunch.
-For example if you were processing 4 cameras on the CPU (with 4 inference requests), you may now want to process more cameras (with more requests in flight) 
-to keep CPU+GPU busy via multi-device.
+For example, if you were processing 4 cameras on the CPU (with 4 inference requests), you may now want to process more cameras (with more requests in flight) to keep CPU+GPU busy via multi-device.
 
-The "setup" of multi-device can be described in three major steps:
+The "setup" of Multi-Device can be described in three major steps:
 * First is configuration of each device as usual (e.g. via conventional SetConfig method)
 * Second is loading of a network to the Multi-Device plugin created on top of (prioritized) list of the  configured devices. This is the only change that you need in your application.
 * Finally, just like with any other ExecutableNetwork (resulted from LoadNetwork) you just create as many requests as needed to saturate the devices.
 These steps are covered below in details.
 
-## Defining and Configuring the Multi-Device
-Following the OpenVINO notions of "devices", the multi-device has a "MULTI" name.
-The only configuration option for the multi-device is prioritized list of devices to use:
+## Defining and Configuring the Multi-Device plugin
+Following the OpenVINO notions of "devices", the Multi-Device has a "MULTI" name.
+The only configuration option for the Multi-Device plugin is a prioritized list of devices to use:
 
 | Parameter name                 | Parameter values      | Default            | Description                                                                                                                  |
 | :---                      | :---                  | :---               | :----------------------------------------------------------------------------------------------------------------------------|
 | "MULTI_DEVICE_PRIORITIES"  | comma-separated device names <span style="color:red">with no spaces</span>| N/A              | Prioritized list of devices                 |
 
-You can use name of the configuration directly as a string, or use MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES from the multi/multi_device_config.hpp that defines the same string.
+You can use name of the configuration directly as a string, or use `MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES from the multi/multi_device_config.hpp`, which defines the same string.
  
 Basically, there are three ways to specify the devices to be use by the "MULTI":
 
 @snippet snippets/MULTI0.cpp part0
 
-Notice that the priorities of the devices can be changed in real-time for the executable network:
+Notice that the priorities of the devices can be changed in real time for the executable network:
 
 @snippet snippets/MULTI1.cpp part1
 
-Finally, there is a way to specify number of requests that the multi-device will internally keep for each device.
-Say if your original app was running 4 cameras with 4 inference requests now you would probably want to share these 4 requests between 2 devices used in the MULTI. The easiest way is to specify a number of requests for each device using parentheses: "MULTI:CPU(2),GPU(2)" and use the same 4 requests in your app. However, such an explicit configuration is not performance portable and hence not recommended. Instead, the better way is to configure the individual devices and query the resulting number of requests to be used in the application level (see [Configuring the Individual Devices and Creating the Multi-Device On Top](#configuring-the-individual-devices-and-creating-the-multi-device-on-top)).
+Finally, there is a way to specify number of requests that the multi-device will internally keep for each device. Suppose your original app was running 4 cameras with 4 inference requests. You would probably want to share these 4 requests between 2 devices used in the MULTI. The easiest way is to specify a number of requests for each device using parentheses: "MULTI:CPU(2),GPU(2)" and use the same 4 requests in your app. However, such an explicit configuration is not performance-portable and hence not recommended. Instead, the better way is to configure the individual devices and query the resulting number of requests to be used at the application level (see [Configuring the Individual Devices and Creating the Multi-Device On Top](#configuring-the-individual-devices-and-creating-the-multi-device-on-top)).
 
 ## Enumerating Available Devices
-Inference Engine now features a dedicated API to enumerate devices and their capabilities. See [Hello Query Device C++ Sample](../../../inference-engine/samples/hello_query_device/README.md).  This is example output of the sample (truncated to the devices' names only):
+Inference Engine now features a dedicated API to enumerate devices and their capabilities. See [Hello Query Device C++ Sample](../../../inference-engine/samples/hello_query_device/README.md).  This is example output from the sample (truncated to the devices' names only):
 
 ```sh
 ./hello_query_device
@@ -55,12 +51,12 @@ Available devices:
 ...
     Device: HDDL
 ```
-Simple programmatic way to enumerate the devices and use with the multi-device is as follows:
+A simple programmatic way to enumerate the devices and use with the multi-device is as follows:
 
 @snippet snippets/MULTI2.cpp part2
 
-Beyond trivial "CPU", "GPU", "HDDL" and so on, when multiple instances of a device are available the names are more qualified.
-For example this is how two Intel® Movidius™ Myriad™ X sticks are listed with the hello_query_sample:
+Beyond the trivial "CPU", "GPU", "HDDL" and so on, when multiple instances of a device are available the names are more qualified.
+For example, this is how two Intel® Movidius™ Myriad™ X sticks are listed with the hello_query_sample:
 ```
 ...
     Device: MYRIAD.1.2-ma2480
@@ -78,7 +74,7 @@ As discussed in the first section, you shall configure each individual device as
 
 @snippet snippets/MULTI4.cpp part4
 
-Alternatively, you can combine all the individual device settings into single config and load that, allowing the multi-device plugin to parse and apply that to the right devices. See code example in the next section.
+Alternatively, you can combine all the individual device settings into single config and load that, allowing the Multi-Device plugin to parse and apply that to the right devices. See code example in the next section.
 
 Notice that while the performance of accelerators combines really well with multi-device, the CPU+GPU execution poses some performance caveats, as these devices share the power, bandwidth and other resources. For example it is recommended to enable the GPU throttling hint (which save another CPU thread for the CPU inference).
 See section of the [Using the multi-device with OpenVINO samples and benchmarking the performance](#using-the-multi-device-with-openvino-samples-and-benchmarking-the-performance) below.
diff --git a/docs/IE_DG/supported_plugins/MYRIAD.md b/docs/IE_DG/supported_plugins/MYRIAD.md
index df9c68da503..8983f20a925 100644
--- a/docs/IE_DG/supported_plugins/MYRIAD.md
+++ b/docs/IE_DG/supported_plugins/MYRIAD.md
@@ -2,7 +2,7 @@
 
 ## Introducing MYRIAD Plugin
 
-The Inference Engine MYRIAD plugin is developed for inference of neural networks on Intel&reg; Neural Compute Stick 2.
+The Inference Engine MYRIAD plugin has been developed for inference of neural networks on Intel&reg; Neural Compute Stick 2.
 
 ## Installation on Linux* OS
 
diff --git a/docs/IE_DG/supported_plugins/Supported_Devices.md b/docs/IE_DG/supported_plugins/Supported_Devices.md
index 514b4bd58a7..ed8cabec076 100644
--- a/docs/IE_DG/supported_plugins/Supported_Devices.md
+++ b/docs/IE_DG/supported_plugins/Supported_Devices.md
@@ -21,7 +21,7 @@ Devices similar to the ones we have used for benchmarking can be accessed using
 ## Supported Configurations
 
 The Inference Engine can inference models in different formats with various input and output formats.
-This chapter provides supported and optimal configurations for each plugin.
+This page shows supported and optimal configurations for each plugin.
 
 ### Terminology
 
@@ -36,17 +36,19 @@ This chapter provides supported and optimal configurations for each plugin.
 |   U16 format      | 2-byte unsigned integer format                |
 |   U8 format       | 1-byte unsigned integer format                |
 
-NHWC, NCHW - Image data layout. Refers to the representation of batches of images.
-NCDHW - Images sequence data layout.
+NHWC, NCHW, and NCDHW refer to the representation of batches of images.
+* NHWC and NCHW refer to image data layout.
+* NCDHW refers to image sequence data layout.
 
-* N - Number of images in a batch
-* D - Depth. Depend on model it could be spatial or time dimension
-* H - Number of pixels in the vertical dimension
-* W - Number of pixels in the horizontal dimension
-* C - Number of channels
+Abbreviations in the support tables are as follows:
+* N: Number of images in a batch
+* D: Depth. Depend on model it could be spatial or time dimension
+* H: Number of pixels in the vertical dimension
+* W: Number of pixels in the horizontal dimension
+* C: Number of channels
 
 CHW, NC, C  - Tensor memory layout.
-For example, the CHW value at index (c,h,w) is physically located at index (c\*H+h)\*W+w, for others by analogy
+For example, the CHW value at index (c,h,w) is physically located at index (c\*H+h)\*W+w, for others by analogy.
 
 ### Supported Model Formats
 
diff --git a/docs/IE_DG/supported_plugins/VPU.md b/docs/IE_DG/supported_plugins/VPU.md
index 189a23b5a94..40cf1ad543b 100644
--- a/docs/IE_DG/supported_plugins/VPU.md
+++ b/docs/IE_DG/supported_plugins/VPU.md
@@ -45,7 +45,7 @@ Certain layers can be merged into Convolution, ReLU, and Eltwise layers accordin
 
 > **NOTE**: Application of these rules depends on tensor sizes and resources available.
 
-Layers can be joined when the two conditions below are met:
+Layers can be joined only when the two conditions below are met:
 - Layers are located on topologically independent branches. 
 - Layers can be executed simultaneously on the same hardware units.
 
diff --git a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
index d21ab41bd5e..2aed66ba719 100644
--- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
+++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md
@@ -68,7 +68,7 @@ Model Optimizer produces an Intermediate Representation (IR) of the network, whi
 > **NOTE:** 
 > [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
 
-## Table of Content
+## Table of Contents
 
 * [Preparing and Optimizing your Trained Model with Model Optimizer](prepare_model/Prepare_Trained_Model.md)
     * [Configuring Model Optimizer](prepare_model/Config_Model_Optimizer.md)
diff --git a/docs/MO_DG/IR_and_opsets.md b/docs/MO_DG/IR_and_opsets.md
index e6a36b3009c..e7cb01f1fc5 100644
--- a/docs/MO_DG/IR_and_opsets.md
+++ b/docs/MO_DG/IR_and_opsets.md
@@ -36,12 +36,12 @@ OpenVINO™ toolkit introduces its own format of graph representation and its ow
 A graph is represented with two files: an XML file and a binary file.
 This representation is commonly referred to as the *Intermediate Representation* or *IR*.
 
-XML file describes a network topology using `<layer>` tag for an operation node and `<edge>` tag is for a data-flow connection.
+The XML file describes a network topology using a `<layer>` tag for an operation node and an `<edge>` tag for a data-flow connection.
 Each operation has a fixed number of attributes that define operation flavor used for a node.
 For example, `Convolution` operation has such attributes as `dilation`, `stride`, `pads_begin` and `pads_end`.
 
-XML file doesn't have big constant values, like convolution weights.
-Instead, it refers to a part of accompanying binary file that stores such values in a binary format.
+The XML file doesn't have big constant values, like convolution weights.
+Instead, it refers to a part of the accompanying binary file that stores such values in a binary format.
 
 Here is an example of a small IR XML file that corresponds to a graph from the previous section:
 
diff --git a/docs/MO_DG/Known_Issues_Limitations.md b/docs/MO_DG/Known_Issues_Limitations.md
index 075cbc6e7c3..ec8897d06c6 100644
--- a/docs/MO_DG/Known_Issues_Limitations.md
+++ b/docs/MO_DG/Known_Issues_Limitations.md
@@ -25,7 +25,7 @@ Possible workarounds:
    LD_PRELOAD=<path_to_libiomp5.so> <path_to your_executable> ```
    This eliminates multiple loadings of libiomp, and makes all the components use this specific version of OpenMP.
 
-*  Alternatively, you can set <code>KMP_DUPLICATE_LIB_OK=TRUE</code>. However, performance degradation or results incorrectness may occur in this case.
+*  Alternatively, you can set <code>KMP_DUPLICATE_LIB_OK=TRUE</code>. However, performance degradation or incorrect results may occur in this case.
 
 
 ## Old proto compiler breaks protobuf library
diff --git a/docs/MO_DG/prepare_model/Config_Model_Optimizer.md b/docs/MO_DG/prepare_model/Config_Model_Optimizer.md
index b5b9853b35c..cced8949e54 100644
--- a/docs/MO_DG/prepare_model/Config_Model_Optimizer.md
+++ b/docs/MO_DG/prepare_model/Config_Model_Optimizer.md
@@ -156,7 +156,7 @@ pip3 install -r requirements_onnx.txt
 These procedures require:
 
 *   Access to GitHub and the ability to use git commands
-*   Microsoft Visual Studio\* 2013 for Win64\*
+*   Microsoft Visual Studio\* 2013 for Win64\* (if using Windows\*)
 *   C/C++
 
 Model Optimizer uses the protobuf library to load trained Caffe models.
diff --git a/docs/MO_DG/prepare_model/Model_Optimization_Techniques.md b/docs/MO_DG/prepare_model/Model_Optimization_Techniques.md
index 60e61207f24..f2ae32a6924 100644
--- a/docs/MO_DG/prepare_model/Model_Optimization_Techniques.md
+++ b/docs/MO_DG/prepare_model/Model_Optimization_Techniques.md
@@ -6,7 +6,7 @@ Optimization offers methods to accelerate inference with the convolution neural
 
 ## Linear Operations Fusing
 
-Many convolution neural networks includes `BatchNormalization` and `ScaleShift` layers (for example, Resnet\*, Inception\*) that can be presented as a sequence of linear operations: additions and multiplications. For example ScaleShift layer can be presented as Mul → Add sequence. These layers can be fused into previous `Convolution` or `FullyConnected` layers, except that case when Convolution comes after Add operation (due to Convolution paddings).
+Many convolution neural networks includes `BatchNormalization` and `ScaleShift` layers (for example, Resnet\*, Inception\*) that can be presented as a sequence of linear operations: additions and multiplications. For example ScaleShift layer can be presented as Mul → Add sequence. These layers can be fused into previous `Convolution` or `FullyConnected` layers, except when Convolution comes after an Add operation (due to Convolution paddings).
 
 ### Usage
 
@@ -16,11 +16,11 @@ In the Model Optimizer, this optimization is turned on by default. To disable it
 
 This optimization method consists of three stages:
 
-1.  <strong>`BatchNormalization` and `ScaleShift` decomposition</strong>: on this stage, `BatchNormalization` layer is decomposed to `Mul → Add → Mul → Add` sequence, and `ScaleShift` layer is decomposed to `Mul → Add` layers sequence.
+1.  <strong>`BatchNormalization` and `ScaleShift` decomposition</strong>: in this stage, `BatchNormalization` layer is decomposed to `Mul → Add → Mul → Add` sequence, and `ScaleShift` layer is decomposed to `Mul → Add` layers sequence.
 
-2.  **Linear operations merge**: on this stage we merge sequences of `Mul` and `Add` operations to the single `Mul → Add` instance.  
-    For example, if we have `BatchNormalization → ScaleShift` sequence in our topology, it is replaced with `Mul → Add` (by the first stage). On the next stage, the latter will be replaced with `ScaleShift` layer in case if we have no available `Convolution` or `FullyConnected` layer to fuse into (next).
-3.  **Linear operations fusion**: on this stage, the tool fuses `Mul` and `Add` operations to `Convolution` or `FullyConnected` layers. Notice that it searches for `Convolution` and `FullyConnected` layers both backward and forward in the graph (except for `Add` operation that cannot be fused to `Convolution` layer in forward direction).
+2.  **Linear operations merge**: in this stage, the `Mul` and `Add` operations are merged into a single `Mul → Add` instance. 
+    For example, if there is a `BatchNormalization → ScaleShift` sequence in the topology, it is replaced with `Mul → Add` in the first stage. In the next stage, the latter is replaced with a `ScaleShift` layer if there is no available `Convolution` or `FullyConnected` layer to fuse into next.
+3.  **Linear operations fusion**: in this stage, the tool fuses `Mul` and `Add` operations to `Convolution` or `FullyConnected` layers. Notice that it searches for `Convolution` and `FullyConnected` layers both backward and forward in the graph (except for `Add` operation that cannot be fused to `Convolution` layer in forward direction).
 
 ### Usage Examples
 
@@ -36,11 +36,11 @@ ResNet optimization is a specific optimization that applies to Caffe ResNet topo
 
 ### Optimization Description
 
-On the picture below, you can see the original and optimized parts of a Caffe ResNet50 model. The main idea of this optimization is to move the stride that is greater than 1 from Convolution layers with the kernel size = 1 to upper Convolution layers. In addition, the Model Optimizer adds a Pooling layer to align the input shape for a Eltwise layer, if it was changed during the optimization. 
+In the picture below, you can see the original and optimized parts of a Caffe ResNet50 model. The main idea of this optimization is to move the stride that is greater than 1 from Convolution layers with the kernel size = 1 to upper Convolution layers. In addition, the Model Optimizer adds a Pooling layer to align the input shape for a Eltwise layer, if it was changed during the optimization. 
 
 ![ResNet50 blocks (original and optimized) from Netscope*](../img/optimizations/resnet_optimization.png)
 
-In this example, the stride from the `res3a_branch1` and `res3a_branch2a` Convolution layers moves to the `res2c_branch2b` Convolution layer. Also to align the input shape for `res2c` Eltwise, the optimization inserts the Pooling layer with kernel size = 1 and stride = 2.
+In this example, the stride from the `res3a_branch1` and `res3a_branch2a` Convolution layers moves to the `res2c_branch2b` Convolution layer. In addition, to align the input shape for `res2c` Eltwise, the optimization inserts the Pooling layer with kernel size = 1 and stride = 2.
 
 * * *
 
diff --git a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
index f04d413bd1a..f9aef04a0a9 100644
--- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
+++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md
@@ -174,7 +174,7 @@ Model Optimizer tried to infer a specified layer via the Caffe\* framework, howe
 
 #### 13. What does the message "Cannot infer shapes due to exception in Caffe" mean? <a name="question-13"></a>
 
-Model Optimizer tried to infer a custom layer via the Caffe\* framework, however an error occurred, meaning that the model could not be inferred using the Caffe. It might happen if you try to convert the model with some noise weights and biases resulting in problems with layers with dynamic shapes. You should write your own extension for every custom layer you topology might have. For more details, refer to [Extending Model Optimizer with New Primitives](customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md).
+Model Optimizer tried to infer a custom layer via the Caffe\* framework, but an error occurred, meaning that the model could not be inferred using Caffe. This might happen if you try to convert the model with some noise weights and biases that result in problems with layers that have dynamic shapes. You should write your own extension for every custom layer you topology might have. For more details, refer to [Model Optimizer Extensibility](customize_model_optimizer/Customize_Model_Optimizer.md).
 
 #### 14. What does the message "Cannot infer shape for node {} because there is no Caffe available. Please register python infer function for op or use Caffe for shape inference" mean? <a name="question-14"></a>
 
@@ -200,7 +200,7 @@ You might have specified negative values with `--mean_file_offsets`. Only positi
 
 `--scale` sets a scaling factor for all channels. `--scale_values` sets a scaling factor per each channel. Using both of them simultaneously produces ambiguity, so you must use only one of them. For more information, refer to the Using Framework-Agnostic Conversion Parameters: for <a href="ConvertFromCaffe.html#using-framework-agnostic-conv-param">Converting a Caffe* Model</a>, <a href="ConvertFromTensorFlow.html#using-framework-agnostic-conv-param">Converting a TensorFlow* Model</a>, <a href="ConvertFromMXNet.html#using-framework-agnostic-conv-param">Converting an MXNet* Model</a>.
 
-#### 20. What does the message "Cannot find prototxt file: for Caffe please specify --input_proto - a protobuf file that stores topology and --input_model that stores pretrained weights" mean? <a name="question-20"></a>
+#### 20. What does the message "Cannot find prototxt file: for Caffe please specify --input_proto - a protobuf file that stores topology and --input_model that stores pre-trained weights" mean? <a name="question-20"></a>
 
 Model Optimizer cannot find a `.prototxt` file for a specified model. By default, it must be located in the same directory as the input model with the same name (except extension). If any of these conditions is not satisfied, use `--input_proto` to specify the path to the `.prototxt` file.
 
@@ -258,7 +258,7 @@ This error occurs when the `SubgraphMatch._add_output_node` function is called m
 
 #### 35. What does the message "Unsupported match kind.... Match kinds "points" or "scope" are supported only" mean? <a name="question-35"></a>
 
-While using configuration file to implement a TensorFlow\* front replacement extension, an incorrect match kind was used. Only `points` or `scope` match kinds are supported. Please, refer to [Sub-Graph Replacement in the Model Optimizer](customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md) for more details.
+While using configuration file to implement a TensorFlow\* front replacement extension, an incorrect match kind was used. Only `points` or `scope` match kinds are supported. Please refer to [Model Optimizer Extensibility](customize_model_optimizer/Customize_Model_Optimizer.md) for more details.
 
 #### 36. What does the message "Cannot write an event file for the TensorBoard to directory" mean? <a name="question-36"></a>
 
diff --git a/docs/MO_DG/prepare_model/Prepare_Trained_Model.md b/docs/MO_DG/prepare_model/Prepare_Trained_Model.md
index f0dca5283f8..a74d1b789a2 100644
--- a/docs/MO_DG/prepare_model/Prepare_Trained_Model.md
+++ b/docs/MO_DG/prepare_model/Prepare_Trained_Model.md
@@ -25,7 +25,7 @@ However, if you use a topology with layers that are not recognized by the Model
 
 ## Model Optimizer Directory Structure
 
-After installation with OpenVINO&trade; toolkit or Intel&reg; Deep Learning Deployment Toolkit, the Model Optimizer folder has the following structure:
+After installation with OpenVINO&trade; toolkit or Intel&reg; Deep Learning Deployment Toolkit, the Model Optimizer folder has the following structure (some directories omitted for clarity):
 ```
 |-- model_optimizer
     |-- extensions
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
index 06ae438d9cd..4c257d1689e 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md
@@ -38,10 +38,10 @@ A summary of the steps for optimizing and deploying a model that was trained wit
 
 To convert a Caffe\* model:
 
-1. Go to the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory.
-2. Use the `mo.py` script to simply convert a model with the path to the input model `.caffemodel` file:
+1. Go to the `$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer` directory.
+2. Use the `mo.py` script to simply convert a model, specifying the path to the input model `.caffemodel` file and the path to an output directory with write permissions:
 ```sh
-python3 mo.py --input_model <INPUT_MODEL>.caffemodel
+python3 mo.py --input_model <INPUT_MODEL>.caffemodel --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Two groups of parameters are available to convert your model:
@@ -91,15 +91,16 @@ Caffe*-specific parameters:
 
 #### Command-Line Interface (CLI) Examples Using Caffe\*-Specific Parameters
 
-* Launching the Model Optimizer for the [bvlc_alexnet.caffemodel](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) with a specified `prototxt` file. This is needed when the name of the Caffe\* model and the `.prototxt` file are different or are placed in different directories. Otherwise, it is enough to provide only the path to the input `model.caffemodel` file.
+* Launching the Model Optimizer for the [bvlc_alexnet.caffemodel](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) with a specified `prototxt` file. This is needed when the name of the Caffe\* model and the `.prototxt` file are different or are placed in different directories. Otherwise, it is enough to provide only the path to the input `model.caffemodel` file. You must have write permissions for the output directory.
+
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --input_proto bvlc_alexnet.prototxt
+python3 mo.py --input_model bvlc_alexnet.caffemodel --input_proto bvlc_alexnet.prototxt --output_dir <OUTPUT_MODEL_DIR>
 ```
 
-* Launching the Model Optimizer for the [bvlc_alexnet.caffemodel](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) with a specified `CustomLayersMapping` file. This is the legacy method of quickly enabling model conversion if your model has custom layers. This requires system Caffe\* on the computer. To read more about this, see [Legacy Mode for Caffe* Custom Layers](../customize_model_optimizer/Legacy_Mode_for_Caffe_Custom_Layers.md).
+* Launching the Model Optimizer for the [bvlc_alexnet.caffemodel](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet) with a specified `CustomLayersMapping` file. This is the legacy method of quickly enabling model conversion if your model has custom layers. This requires the Caffe\* system on the computer. To read more about this, see [Legacy Mode for Caffe* Custom Layers](../customize_model_optimizer/Legacy_Mode_for_Caffe_Custom_Layers.md).
 Optional parameters without default values and not specified by the user in the `.prototxt` file are removed from the Intermediate Representation, and nested parameters are flattened:
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel -k CustomLayersMapping.xml --disable_omitting_optional --enable_flattening_nested_params
+python3 mo.py --input_model bvlc_alexnet.caffemodel -k CustomLayersMapping.xml --disable_omitting_optional --enable_flattening_nested_params --output_dir <OUTPUT_MODEL_DIR>
 ```
 		This example shows a multi-input model with input layers: `data`, `rois`
 ```
@@ -121,9 +122,9 @@ layer {
 }
 ```
 
-* Launching the Model Optimizer for a multi-input model with two inputs and providing a new shape for each input in the order they are passed to the Model Optimizer. In particular, for data, set the shape to `1,3,227,227`. For rois, set the shape to `1,6,1,1`:
+* Launching the Model Optimizer for a multi-input model with two inputs and providing a new shape for each input in the order they are passed to the Model Optimizer along with a writable output directory. In particular, for data, set the shape to `1,3,227,227`. For rois, set the shape to `1,6,1,1`:
 ```sh
-python3 mo.py --input_model /path-to/your-model.caffemodel --input data,rois --input_shape (1,3,227,227),[1,6,1,1]
+python3 mo.py --input_model /path-to/your-model.caffemodel --input data,rois --input_shape (1,3,227,227),[1,6,1,1] --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 ## Custom Layer Definition
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md
index 98bf2b78c3a..23fbad2ee08 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md
@@ -34,9 +34,9 @@ A summary of the steps for optimizing and deploying a model that was trained wit
 To convert a Kaldi\* model:
 
 1. Go to the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory.
-2. Use the `mo.py` script to simply convert a model with the path to the input model `.nnet` or `.mdl` file:
+2. Use the `mo.py` script to simply convert a model with the path to the input model `.nnet` or `.mdl` file and to an output directory where you have write permissions:
 ```sh
-python3 mo.py --input_model <INPUT_MODEL>.nnet
+python3 mo.py --input_model <INPUT_MODEL>.nnet --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Two groups of parameters are available to convert your model:
@@ -58,14 +58,14 @@ Kaldi-specific parameters:
 
 ### Examples of CLI Commands
 
-* To launch the Model Optimizer for the wsj_dnn5b_smbr model with the specified `.nnet` file:
+* To launch the Model Optimizer for the wsj_dnn5b_smbr model with the specified `.nnet` file and an output directory where you have write permissions:
 ```sh
-python3 mo.py --input_model wsj_dnn5b_smbr.nnet
+python3 mo.py --input_model wsj_dnn5b_smbr.nnet --output_dir <OUTPUT_MODEL_DIR>
 ```
 
-* To launch the Model Optimizer for the wsj_dnn5b_smbr model with existing file that contains counts for the last layer with biases:
+* To launch the Model Optimizer for the wsj_dnn5b_smbr model with existing file that contains counts for the last layer with biases and a writable output directory:
 ```sh
-python3 mo.py --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts
+python3 mo.py --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts --output_dir <OUTPUT_MODEL_DIR>_
 ```
   * The Model Optimizer normalizes сounts in the following way:
 	\f[
@@ -81,7 +81,7 @@ python3 mo.py --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts
 * If you want to remove the last SoftMax layer in the topology, launch the Model Optimizer with the
 `--remove_output_softmax` flag.
 ```sh
-python3 mo.py --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts --remove_output_softmax
+python3 mo.py --input_model wsj_dnn5b_smbr.nnet --counts wsj_dnn5b_smbr.counts --remove_output_softmax --output_dir <OUTPUT_MODEL_DIR>_
 ```
 The Model Optimizer finds the last layer of the topology and removes this layer only if it is a SoftMax layer.
 
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
index 32b0c5fe95a..4b8c1816e8b 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md
@@ -1,4 +1,4 @@
-# Converting a MXNet* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet}
+# Converting an MXNet* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_MxNet}
 
 A summary of the steps for optimizing and deploying a model that was trained with the MXNet\* framework:
 
@@ -46,9 +46,9 @@ A summary of the steps for optimizing and deploying a model that was trained wit
 To convert an MXNet\* model:
 
 1. Go to the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory.
-2. To convert an MXNet\* model contained in a `model-file-symbol.json` and `model-file-0000.params`, run the Model Optimizer launch script `mo.py`, specifying a path to the input model file:
+2. To convert an MXNet\* model contained in a `model-file-symbol.json` and `model-file-0000.params`, run the Model Optimizer launch script `mo.py`, specifying a path to the input model file and a path to an output directory with write permissions:
 ```sh
-python3 mo_mxnet.py --input_model model-file-0000.params
+python3 mo_mxnet.py --input_model model-file-0000.params --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Two groups of parameters are available to convert your model:
@@ -67,7 +67,7 @@ MXNet-specific parameters:
   --nd_prefix_name <ND_PREFIX_NAME>
             Prefix name for args.nd and argx.nd files
   --pretrained_model_name <PRETRAINED_MODEL_NAME>
-            Name of a pretrained MXNet model without extension and epoch
+            Name of a pre-trained MXNet model without extension and epoch
             number. This model will be merged with args.nd and argx.nd
             files
   --save_params_from_nd
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md
index 561a7f84cf8..79f740b55ec 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md
@@ -25,7 +25,7 @@
 | GPT-2 | [model archive](https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/model/gpt2-10.tar.gz) |
 | YOLOv3 | [model archive](https://github.com/onnx/models/blob/master/vision/object_detection_segmentation/yolov3/model/yolov3-10.tar.gz) |
 
-Listed models are built with the operation set version 8 except the GPT-2 model. Models that are upgraded to higher operation set versions may not be supported.
+Listed models are built with the operation set version 8 except the GPT-2 model (which uses version 10). Models that are upgraded to higher operation set versions may not be supported.
 
 ## Supported PaddlePaddle* Models via ONNX Conversion
 Starting from the R5 release, the OpenVINO™ toolkit officially supports public PaddlePaddle* models via ONNX conversion.
@@ -60,9 +60,9 @@ The Model Optimizer process assumes you have an ONNX model that was directly dow
 To convert an ONNX\* model:
 
 1. Go to the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory.
-2. Use the `mo.py` script to simply convert a model with the path to the input model `.nnet` file:
+2. Use the `mo.py` script to simply convert a model with the path to the input model `.nnet` file and an output directory where you have write permissions:
 ```sh
-python3 mo.py --input_model <INPUT_MODEL>.onnx
+python3 mo.py --input_model <INPUT_MODEL>.onnx --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 There are no ONNX\* specific parameters, so only [framework-agnostic parameters](Converting_Model_General.md) are available to convert your model.
diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
index 275b8e786d0..c4721cdead0 100644
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md
@@ -35,7 +35,7 @@ Detailed information on how to convert models from the <a href="https://github.c
 |VGG-16| [vgg_16_2016_08_28.tar.gz](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz)| [103.94,116.78,123.68] | 1 |
 |VGG-19| [vgg_19_2016_08_28.tar.gz](http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz)| [103.94,116.78,123.68] | 1 |
 
-**Supported Pre-Trained Topologies from TensorFlow 1 Object Detection Models Zoo**
+**Supported Pre-Trained Topologies from TensorFlow 1 Detection Model Zoo**
 
 Detailed information on how to convert models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md">TensorFlow 1 Detection Model Zoo</a> is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models zoo that are supported.
 
@@ -68,7 +68,7 @@ Detailed information on how to convert models from the <a href="https://github.c
 |Faster R-CNN Inception ResNet V2 Low Proposals Open Images\*|  [faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_oid_2018_01_28.tar.gz)|
 |Faster R-CNN ResNet 101 AVA v2.1\*|  [faster_rcnn_resnet101_ava_v2.1_2018_04_30.tar.gz](http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_ava_v2.1_2018_04_30.tar.gz)|
 
-**Supported Pre-Trained Topologies from TensorFlow 2 Object Detection Models Zoo**
+**Supported Pre-Trained Topologies from TensorFlow 2 Detection Model Zoo**
 
 Detailed information on how to convert models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md">TensorFlow 2 Detection Model Zoo</a> is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models zoo that are supported.
 
@@ -154,13 +154,13 @@ Where `HEIGHT` and `WIDTH` are the input images height and width for which the m
 | YOLOv4 | [Repo](https://github.com/Ma-Dan/keras-yolo4) |
 | STN | [Repo](https://github.com/oarriaga/STN.keras) |
 
-* YOLO topologies from DarkNet* can be converted using [instruction](tf_specific/Convert_YOLO_From_Tensorflow.md),
-* FaceNet topologies can be converted using [instruction](tf_specific/Convert_FaceNet_From_Tensorflow.md).
-* CRNN topologies can be converted using [instruction](tf_specific/Convert_CRNN_From_Tensorflow.md).
-* NCF topologies can be converted using [instruction](tf_specific/Convert_NCF_From_Tensorflow.md)
-* [GNMT](https://github.com/tensorflow/nmt) topology can be converted using [instruction](tf_specific/Convert_GNMT_From_Tensorflow.md)
-* [BERT](https://github.com/google-research/bert) topology can be converted using [this instruction](tf_specific/Convert_BERT_From_Tensorflow.md).
-* [XLNet](https://github.com/zihangdai/xlnet) topology can be converted using [this instruction](tf_specific/Convert_XLNet_From_Tensorflow.md).
+* YOLO topologies from DarkNet* can be converted using [these instructions](tf_specific/Convert_YOLO_From_Tensorflow.md).
+* FaceNet topologies can be converted using [these instructions](tf_specific/Convert_FaceNet_From_Tensorflow.md).
+* CRNN topologies can be converted using [these instructions](tf_specific/Convert_CRNN_From_Tensorflow.md).
+* NCF topologies can be converted using [these instructions](tf_specific/Convert_NCF_From_Tensorflow.md).
+* [GNMT](https://github.com/tensorflow/nmt) topology can be converted using [these instructions](tf_specific/Convert_GNMT_From_Tensorflow.md).
+* [BERT](https://github.com/google-research/bert) topology can be converted using [these instructions](tf_specific/Convert_BERT_From_Tensorflow.md).
+* [XLNet](https://github.com/zihangdai/xlnet) topology can be converted using [these instructions](tf_specific/Convert_XLNet_From_Tensorflow.md).
 
   
 
@@ -176,18 +176,18 @@ There are three ways to store non-frozen TensorFlow models and load them to the
 
     If you do not have an inference graph file, refer to [Freezing Custom Models in Python](#freeze-the-tensorflow-model).
 
-    To convert such TensorFlow model:
+    To convert such a TensorFlow model:
 
     1. Go to the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory
-    2. Run the `mo_tf.py` script with the path to the checkpoint file to convert a model:
+    2. Run the `mo_tf.py` script with the path to the checkpoint file to convert a model and an output directory where you have write permissions:
 
     * If input model is in `.pb` format:<br>
 ```sh
-python3 mo_tf.py --input_model <INFERENCE_GRAPH>.pb --input_checkpoint <INPUT_CHECKPOINT>
+python3 mo_tf.py --input_model <INFERENCE_GRAPH>.pb --input_checkpoint <INPUT_CHECKPOINT> --output_dir <OUTPUT_MODEL_DIR>
 ```
     * If input model is in `.pbtxt` format:<br>
 ```sh
-python3 mo_tf.py --input_model <INFERENCE_GRAPH>.pbtxt --input_checkpoint <INPUT_CHECKPOINT> --input_model_is_text
+python3 mo_tf.py --input_model <INFERENCE_GRAPH>.pbtxt --input_checkpoint <INPUT_CHECKPOINT> --input_model_is_text --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 2. MetaGraph:
@@ -201,9 +201,9 @@ python3 mo_tf.py --input_model <INFERENCE_GRAPH>.pbtxt --input_checkpoint <INPUT
     To convert such TensorFlow model:
 
     1. Go to the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory
-    2. Run the `mo_tf.py` script with a path to the MetaGraph `.meta` file to convert a model:<br>
+    2. Run the `mo_tf.py` script with a path to the MetaGraph `.meta` file and a writable output directory to convert a model:<br>
 ```sh
-python3 mo_tf.py --input_meta_graph <INPUT_META_GRAPH>.meta
+python3 mo_tf.py --input_meta_graph <INPUT_META_GRAPH>.meta --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 3. SavedModel format of TensorFlow 1.x and 2.x versions:
@@ -213,9 +213,9 @@ python3 mo_tf.py --input_meta_graph <INPUT_META_GRAPH>.meta
     To convert such TensorFlow model:
 
     1. Go to the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory
-    2. Run the `mo_tf.py` script with a path to the SavedModel directory to convert a model:<br>
+    2. Run the `mo_tf.py` script with a path to the SavedModel directory and a writable output directory to convert a model:<br>
 ```sh
-python3 mo_tf.py --saved_model_dir <SAVED_MODEL_DIRECTORY>
+python3 mo_tf.py --saved_model_dir <SAVED_MODEL_DIRECTORY> --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 You can convert TensorFlow 1.x SavedModel format in the environment that has a 1.x or 2.x version of TensorFlow. However, TensorFlow 2.x SavedModel format strictly requires the 2.x version of TensorFlow.
@@ -252,9 +252,9 @@ Where:
 To convert a TensorFlow model:
 
 1. Go to the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory
-2. Use the `mo_tf.py` script to simply convert a model with the path to the input model `.pb` file:
+2. Use the `mo_tf.py` script to simply convert a model with the path to the input model `.pb` file and a writable output directory:
 ```sh
-python3 mo_tf.py --input_model <INPUT_MODEL>.pb
+python3 mo_tf.py --input_model <INPUT_MODEL>.pb --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Two groups of parameters are available to convert your model:
@@ -306,29 +306,29 @@ TensorFlow*-specific parameters:
 
 #### Command-Line Interface (CLI) Examples Using TensorFlow\*-Specific Parameters
 
-* Launching the Model Optimizer for Inception V1 frozen model when model file is a plain text protobuf:
+* Launching the Model Optimizer for Inception V1 frozen model when model file is a plain text protobuf, specifying a writable output directory:
 ```sh
-python3 mo_tf.py --input_model inception_v1.pbtxt --input_model_is_text -b 1
+python3 mo_tf.py --input_model inception_v1.pbtxt --input_model_is_text -b 1 --output_dir <OUTPUT_MODEL_DIR>
 ```
 
-* Launching the Model Optimizer for Inception V1 frozen model and update custom sub-graph replacement file `transform.json` with information about input and output nodes of the matched sub-graph. For more information about this feature, refer to [Sub-Graph Replacement in the Model Optimizer](../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md).
+* Launching the Model Optimizer for Inception V1 frozen model and update custom sub-graph replacement file `transform.json` with information about input and output nodes of the matched sub-graph, specifying a writable output directory. For more information about this feature, refer to [Sub-Graph Replacement in the Model Optimizer](../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md).
 ```sh
-python3 mo_tf.py --input_model inception_v1.pb -b 1 --tensorflow_custom_operations_config_update transform.json
+python3 mo_tf.py --input_model inception_v1.pb -b 1 --tensorflow_custom_operations_config_update transform.json --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 * Launching the Model Optimizer for Inception V1 frozen model and use custom sub-graph replacement file `transform.json` for model conversion. For more information about this feature, refer to [Sub-Graph Replacement in the Model Optimizer](../customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md).
 ```sh
-python3 mo_tf.py --input_model inception_v1.pb -b 1 --transformations_config transform.json
+python3 mo_tf.py --input_model inception_v1.pb -b 1 --transformations_config transform.json --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 * Launching the Model Optimizer for Inception V1 frozen model and dump information about the graph to TensorBoard log dir `/tmp/log_dir`
 ```sh
-python3 mo_tf.py --input_model inception_v1.pb -b 1 --tensorboard_logdir /tmp/log_dir
+python3 mo_tf.py --input_model inception_v1.pb -b 1 --tensorboard_logdir /tmp/log_dir --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 * Launching the Model Optimizer for a model with custom TensorFlow operations (refer to the [TensorFlow* documentation](https://www.tensorflow.org/extend/adding_an_op)) implemented in C++ and compiled into the shared library `my_custom_op.so`. Model Optimizer falls back to TensorFlow to infer output shape of operations implemented in the library if a custom TensorFlow operation library is provided. If it is not provided, a custom operation with an inference function is needed. For more information about custom operations, refer to the [Extending the Model Optimizer with New Primitives](../customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md).
 ```sh
-python3 mo_tf.py --input_model custom_model.pb --tensorflow_custom_layer_libraries ./my_custom_op.so
+python3 mo_tf.py --input_model custom_model.pb --tensorflow_custom_layer_libraries ./my_custom_op.so --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 
@@ -343,9 +343,9 @@ Below are the instructions on how to convert each of them.
 A model in the SavedModel format consists of a directory with a `saved_model.pb` file and two subfolders: `variables` and `assets`. 
 To convert such a model:
 1. Go to the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory.
-2. Run the `mo_tf.py` script with a path to the SavedModel directory:
+2. Run the `mo_tf.py` script with a path to the SavedModel directory and a writable output directory:
 ```sh
-python3 mo_tf.py --saved_model_dir <SAVED_MODEL_DIRECTORY>
+python3 mo_tf.py --saved_model_dir <SAVED_MODEL_DIRECTORY> --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 TensorFlow* 2 SavedModel format strictly requires the 2.x version of TensorFlow installed in the
diff --git a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
index 2df7773b8ad..ed6451a7632 100644
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
@@ -1,9 +1,9 @@
 # Converting a Model to Intermediate Representation (IR)  {#openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model}
 
 Use the <code>mo.py</code> script from the `<INSTALL_DIR>/deployment_tools/model_optimizer` directory to run the Model Optimizer and convert the model to the Intermediate Representation (IR). 
-The simplest way to convert a model is to run <code>mo.py</code> with a path to the input model file:
+The simplest way to convert a model is to run <code>mo.py</code> with a path to the input model file and an output directory where you have write permissions:
 ```sh
-python3 mo.py --input_model INPUT_MODEL
+python3 mo.py --input_model INPUT_MODEL --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 > **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md).
diff --git a/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md b/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md
index 82bcd133bb8..2d267cda3e7 100644
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md
@@ -1,11 +1,12 @@
 # Converting a Model Using General Conversion Parameters {#openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General}
 
-To simply convert a model trained by any supported framework, run the Model Optimizer launch script ``mo.py`` with
-specifying a path to the input model file:
+To simply convert a model trained by any supported framework, run the Model Optimizer launch script ``mo.py`` specifying a path to the input model file and an output directory where you have write permissions:
 ```sh
-python3 mo.py --input_model INPUT_MODEL
+python3 mo.py --input_model INPUT_MODEL --output_dir <OUTPUT_MODEL_DIR>
 ```
 
+The script is in `$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/`. The output directory must have write permissions, so you can run mo.py from the output directory or specify an output path with the `--output_dir` option.
+
 > **NOTE:** The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For details, refer to [When to Reverse Input Channels](#when_to_reverse_input_channels).
 
 To adjust the conversion process, you can also use the general (framework-agnostic) parameters:
@@ -157,7 +158,7 @@ If both mean and scale values are specified, the mean is subtracted first and th
 There is no a universal recipe for determining the mean/scale values for a particular model. The steps below could help to determine them:
 * Read the model documentation. Usually the documentation describes mean/scale value if the pre-processing is required.
 * Open the example script/application executing the model and track how the input data is read and passed to the framework.
-* Open the model in a visualization tool and check for layers performing subtraction or multiplication (like `Sub`, `Mul`, `ScaleShift`, `Eltwise` etc) of the input data. If such layers exist, the pre-processing is most probably the part of the model.
+* Open the model in a visualization tool and check for layers performing subtraction or multiplication (like `Sub`, `Mul`, `ScaleShift`, `Eltwise` etc) of the input data. If such layers exist, pre-processing is probably part of the model.
 
 ## When to Specify Input Shapes <a name="when_to_specify_input_shapes"></a>
 There are situations when the input data shape for the model is not fixed, like for the fully-convolutional neural networks. In this case, for example, TensorFlow\* models contain `-1` values in the `shape` attribute of the `Placeholder` operation. Inference Engine does not support input layers with undefined size, so if the input shapes are not defined in the model, the Model Optimizer fails to convert the model. The solution is to provide the input shape(s) using the `--input` or `--input_shape` command line parameter for all input(s) of the model or provide the batch size using the `-b` command line parameter if the model contains just one input with undefined batch size only. In the latter case, the `Placeholder` shape for the TensorFlow\* model looks like this `[-1, 224, 224, 3]`. 
@@ -173,55 +174,55 @@ Resulting Intermediate Representation will not be resizable with the help of Inf
 
 Launch the Model Optimizer for the Caffe bvlc_alexnet model with debug log level:
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --log_level DEBUG
+python3 mo.py --input_model bvlc_alexnet.caffemodel --log_level DEBUG --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Launch the Model Optimizer for the Caffe bvlc_alexnet model with the output IR called `result.*` in the specified `output_dir`:
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --model_name result --output_dir /../../models/
+python3 mo.py --input_model bvlc_alexnet.caffemodel --model_name result --output_dir /../../models/ 
 ```
 
 Launch the Model Optimizer for the Caffe bvlc_alexnet model with one input with scale values:
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --scale_values [59,59,59]
+python3 mo.py --input_model bvlc_alexnet.caffemodel --scale_values [59,59,59] --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Launch the Model Optimizer for the Caffe bvlc_alexnet model with multiple inputs with scale values:
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --input data,rois --scale_values [59,59,59],[5,5,5]
+python3 mo.py --input_model bvlc_alexnet.caffemodel --input data,rois --scale_values [59,59,59],[5,5,5] --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Launch the Model Optimizer for the Caffe bvlc_alexnet model with multiple inputs with scale and mean values specified for the particular nodes:
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --input data,rois --mean_values data[59,59,59] --scale_values rois[5,5,5]
+python3 mo.py --input_model bvlc_alexnet.caffemodel --input data,rois --mean_values data[59,59,59] --scale_values rois[5,5,5] --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Launch the Model Optimizer for the Caffe bvlc_alexnet model with specified input layer, overridden input shape, scale 5, batch 8 and specified name of an output operation:
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --input "data[1 3 224 224]" --output pool5 -s 5 -b 8
+python3 mo.py --input_model bvlc_alexnet.caffemodel --input "data[1 3 224 224]" --output pool5 -s 5 -b 8 --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Launch the Model Optimizer for the Caffe bvlc_alexnet model with disabled fusing for linear operations to Convolution and grouped convolutions:
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --disable_fusing --disable_gfusing
+python3 mo.py --input_model bvlc_alexnet.caffemodel --disable_fusing --disable_gfusing --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Launch the Model Optimizer for the Caffe bvlc_alexnet model with reversed input channels order between RGB and BGR, specified mean values to be used for the input image per channel and specified data type for input tensor values:
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --reverse_input_channels --mean_values [255,255,255] --data_type FP16
+python3 mo.py --input_model bvlc_alexnet.caffemodel --reverse_input_channels --mean_values [255,255,255] --data_type FP16 --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Launch the Model Optimizer for the Caffe bvlc_alexnet model with extensions listed in specified directories, specified mean_images binaryproto.
  file For more information about extensions, please refer to [this](../customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md) page.
 ```sh
-python3 mo.py --input_model bvlc_alexnet.caffemodel --extensions /home/,/some/other/path/ --mean_file /path/to/binaryproto
+python3 mo.py --input_model bvlc_alexnet.caffemodel --extensions /home/,/some/other/path/ --mean_file /path/to/binaryproto --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Launch the Model Optimizer for TensorFlow* FaceNet* model with a placeholder freezing value. 
 It replaces the placeholder with a constant layer that contains the passed value.
 For more information about FaceNet conversion, please refer to [this](tf_specific/Convert_FaceNet_From_Tensorflow.md) page
 ```sh
-python3 mo.py --input_model FaceNet.pb --input "phase_train->False"
+python3 mo.py --input_model FaceNet.pb --input "phase_train->False" --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Launch the Model Optimizer for any model with a placeholder freezing tensor of values. 
@@ -231,5 +232,5 @@ Tensor here is represented in square brackets with each value separated from ano
 If data type is set in the model, this tensor will be reshaped to a placeholder shape and casted to placeholder data type.
 Otherwise, it will be casted to data type passed to `--data_type` parameter (by default, it is FP32).
 ```sh
-python3 mo.py --input_model FaceNet.pb --input "placeholder_layer_name->[0.1 1.2 2.3]"
+python3 mo.py --input_model FaceNet.pb --input "placeholder_layer_name->[0.1 1.2 2.3]" --output_dir <OUTPUT_MODEL_DIR>
 ```
diff --git a/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md b/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md
index a4bb4e98017..d86368a9f70 100644
--- a/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md
@@ -37,10 +37,13 @@ In the TensorBoard, it looks the following way together with some predecessors:
 
 ![TensorBoard with predecessors](../../img/inception_v1_std_output.png)
 
-Convert this model:
+Convert this model and put the results in a writable output directory:
 ```sh
-python3 mo.py --input_model=inception_v1.pb -b 1
+${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer
+python3 mo.py --input_model inception_v1.pb -b 1 --output_dir <OUTPUT_MODEL_DIR>
 ```
+(The other examples on this page assume that you first cd to the `model_optimizer` directory and add the `--output_dir` argument with a directory where you have write permissions.)
+
 The output `.xml` file with an Intermediate Representation contains the `Input` layer among other layers in the model:
 ```xml
 <layer id="286" name="input" precision="FP32" type="Input">
@@ -78,9 +81,9 @@ The last layer in the model is `InceptionV1/Logits/Predictions/Reshape_1`, which
 ```
 Due to automatic identification of inputs and outputs, you do not need to provide the `--input` and `--output` options to convert the whole model. The following commands are equivalent for the Inception V1 model:
 ```sh
-python3 mo.py --input_model=inception_v1.pb -b 1
+python3 mo.py --input_model inception_v1.pb -b 1 --output_dir <OUTPUT_MODEL_DIR>
 
-python3 mo.py --input_model=inception_v1.pb -b 1 --input=input --output=InceptionV1/Logits/Predictions/Reshape_1
+python3 mo.py --input_model inception_v1.pb -b 1 --input input --output InceptionV1/Logits/Predictions/Reshape_1 --output_dir <OUTPUT_MODEL_DIR>
 ```
 The Intermediate Representations are identical for both conversions. The same is true if the model has multiple inputs and/or outputs.
 
@@ -96,7 +99,7 @@ If you want to cut your model at the end, you have the following options:
 
 1. The following command cuts off the rest of the model after the `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`, making this node the last in the model:
 ```sh
-python3 mo.py --input_model=inception_v1.pb -b 1 --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu
+python3 mo.py --input_model inception_v1.pb -b 1 --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
 ```
    The resulting Intermediate Representation has three layers:
 ```xml
@@ -140,7 +143,7 @@ python3 mo.py --input_model=inception_v1.pb -b 1 --output=InceptionV1/InceptionV
 
 2. The following command cuts the edge that comes from 0 output port of the `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu` and the rest of the model, making this node the last one in the model:
 ```sh
-python3 mo.py --input_model=inception_v1.pb -b 1 --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu:0
+python3 mo.py --input_model inception_v1.pb -b 1 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu:0 --output_dir <OUTPUT_MODEL_DIR>
 ```
    The resulting Intermediate Representation has three layers, which are the same as in the previous case:
 ```xml
@@ -184,7 +187,7 @@ python3 mo.py --input_model=inception_v1.pb -b 1 --output=InceptionV1/InceptionV
 
 3. The following command cuts the edge that comes to 0 input port of the `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu` and the rest of the model including `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu`, deleting this node and making the previous node `InceptionV1/InceptionV1/Conv2d_1a_7x7/Conv2D` the last in the model:
 ```sh
-python3 mo.py --input_model=inception_v1.pb -b 1 --output=0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu
+python3 mo.py --input_model inception_v1.pb -b 1 --output=0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
 ```
    The resulting Intermediate Representation has two layers, which are the same as the first two layers in the previous case:
 ```xml
@@ -222,7 +225,7 @@ If you want to go further and cut the beginning of the model, leaving only the `
 
 1.  You can use the following command line, where `--input` and `--output` specify the same node in the graph:
 ```sh
-python3 mo.py --input_model=inception_v1.pb -b 1 --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --input=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu
+python3 mo.py --input_model=inception_v1.pb -b 1 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --input InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
 ```
    The resulting Intermediate Representation looks as follows:
 ```xml
@@ -254,7 +257,7 @@ Even though `--input_shape` is not specified in the command line, the shapes for
 2. You can cut edge incoming to layer by port number. To specify incoming port use notation `--input=port:input_node`. 
 So, to cut everything before `ReLU` layer, cut edge incoming in port 0 of `InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu` node:
 ```sh
-python3 mo.py --input_model=inception_v1.pb -b 1 --input=0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu
+python3 mo.py --input_model inception_v1.pb -b 1 --input 0:InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
 ```
    The resulting Intermediate Representation looks as follows:
 ```xml
@@ -286,7 +289,7 @@ Even though `--input_shape` is not specified in the command line, the shapes for
 3. You can cut edge outcoming from layer by port number. To specify outcoming port use notation `--input=input_node:port`.
 So, to cut everything before `ReLU` layer, cut edge from `InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1` node to `ReLU`:
 ```sh
-python3 mo.py --input_model=inception_v1.pb -b 1 --input=InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1:0 --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu
+python3 mo.py --input_model inception_v1.pb -b 1 --input InceptionV1/InceptionV1/Conv2d_1a_7x7/BatchNorm/batchnorm/add_1:0 --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
 ```
    The resulting Intermediate Representation looks as follows:
 ```xml
@@ -317,7 +320,7 @@ python3 mo.py --input_model=inception_v1.pb -b 1 --input=InceptionV1/InceptionV1
 
 The input shape can be overridden with `--input_shape`. In this case, the shape is applied to the node referenced in `--input`, not to the original `Placeholder` in the model. For example, this command line
 ```sh
-python3 mo.py --input_model=inception_v1.pb --input_shape=[1,5,10,20] --output=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --input=InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu
+python3 mo.py --input_model inception_v1.pb --input_shape=[1,5,10,20] --output InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --input InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 gives the following shapes in the `Input` and `ReLU` layers:
@@ -366,14 +369,14 @@ There are operations that contain more than one input ports. In the example cons
 Following this behavior, the Model Optimizer creates an `Input` layer for port 0 only, leaving port 1 as a constant. So the result of:
 
 ```sh
-python3 mo.py --input_model=inception_v1.pb -b 1 --input=InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution
+python3 mo.py --input_model inception_v1.pb -b 1 --input InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 is identical to the result of conversion of the model as a whole, because this convolution is the first executable operation in Inception V1.
 
 Different behavior occurs when `--input_shape` is also used as an attempt to override the input shape:
 ```sh
-python3 mo.py --input_model=inception_v1.pb--input=InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape=[1,224,224,3]
+python3 mo.py --input_model inception_v1.pb--input=InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape [1,224,224,3]  --output_dir <OUTPUT_MODEL_DIR>
 ```
 An error occurs (for more information, see <a href="MO_FAQ.html#FAQ30">FAQ #30</a>):
 ```sh
@@ -385,5 +388,5 @@ In this case, when `--input_shape` is specified and the node contains multiple i
 
 The correct command line is:
 ```sh
-python3 mo.py --input_model=inception_v1.pb --input=0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape=[1,224,224,3]
+python3 mo.py --input_model inception_v1.pb --input 0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape=[1,224,224,3] --output_dir <OUTPUT_MODEL_DIR>
 ```
diff --git a/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md b/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md
index 50b0020ee2f..eda5d768c47 100644
--- a/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md
+++ b/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md
@@ -5,7 +5,7 @@
 Inference Engine CPU plugin can infer models in the 8-bit integer (INT8) precision. 
 For details, refer to [INT8 inference on the CPU](../../../IE_DG/Int8Inference.md).
 
-Intermediate Representation (IR) should be specifically formed to be suitable for the INT8 inference. 
+Intermediate Representation (IR) should be specifically formed to be suitable for INT8 inference. 
 Such an IR is called an INT8 IR and you can generate it in two ways:
 - [Quantize model with the Post-Training Optimization tool](@ref pot_README)
 - Use the Model Optimizer for TensorFlow\* pre-TFLite models (`.pb` model file with `FakeQuantize*` operations)
@@ -18,11 +18,11 @@ To execute the `Convolution` operation in INT8 on CPU, both data and weight inpu
 ![](../../img/expanded_int8_Convolution_weights.png)
 
 INT8 IR is also suitable for FP32 and FP16 inference if a chosen plugin supports all operations of the IR, because the only difference between an INT8 IR and FP16 or FP32 IR is the existence of `FakeQuantize` in the INT8 IR. 
-Plugins with the INT8 inference support recognize these sub-graphs and quantize them during the inference time. 
-Plugins without the INT8 support execute all operations, including `FakeQuantize`, as is in the FP32 or FP16 precision.   
+Plugins with INT8 inference support recognize these sub-graphs and quantize them during the inference time. 
+Plugins without INT8 support execute all operations, including `FakeQuantize`, as is in the FP32 or FP16 precision.   
 
 Accordingly, the presence of FakeQuantize operations in the IR is a recommendation for a plugin on how to quantize particular operations in the model. 
-If capable, a plugin accepts the recommendation and performs the INT8 inference, otherwise the plugin ignores the recommendation and executes a model in the floating-point precision. 
+If capable, a plugin accepts the recommendation and performs INT8 inference, otherwise the plugin ignores the recommendation and executes a model in the floating-point precision. 
 
 ## Compressed INT8 Weights
 
diff --git a/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md b/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md
index b4e0ea06651..f6709865b5c 100644
--- a/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/kaldi_specific/Aspire_Tdnn_Model.md
@@ -1,7 +1,7 @@
 # Convert Kaldi* ASpIRE Chain Time Delay Neural Network (TDNN) Model to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_kaldi_specific_Aspire_Tdnn_Model}
 
 You can [download a pre-trained model](https://kaldi-asr.org/models/1/0001_aspire_chain_model.tar.gz) 
-for the ASpIRE Chain Time Delay Neural Network (TDNN) from the Kaldi* project official web-site.
+for the ASpIRE Chain Time Delay Neural Network (TDNN) from the Kaldi* project official website.
 
 ## Convert ASpIRE Chain TDNN Model to IR
 
diff --git a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md
index ae65c1b2261..45d3db18423 100644
--- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_GluonCV_Models.md
@@ -2,7 +2,7 @@
 
 This document provides the instructions and examples on how to use Model Optimizer to convert [GluonCV SSD and YOLO-v3 models](https://gluon-cv.mxnet.io/model_zoo/detection.html) to IR.
 
-1. Choose the topology available from the [GluonCV Moodel Zoo](https://gluon-cv.mxnet.io/model_zoo/detection.html) and export to the MXNet format using the GluonCV API. For example, for the `ssd_512_mobilenet1.0` topology: 
+1. Choose the topology available from the [GluonCV Model Zoo](https://gluon-cv.mxnet.io/model_zoo/detection.html) and export to the MXNet format using the GluonCV API. For example, for the `ssd_512_mobilenet1.0` topology: 
 ```python
 from gluoncv import model_zoo, data, utils
 from gluoncv.utils import export_block
@@ -13,14 +13,14 @@ As a result, you will get an MXNet model representation in `ssd_512_mobilenet1.0
 2. Run the Model Optimizer tool specifying the `--enable_ssd_gluoncv` option. Make sure the `--input_shape` parameter is set to the input shape layout of your model (NHWC or NCHW). The examples below illustrates running the Model Optimizer for the SSD and YOLO-v3 models trained with the NHWC layout and located in the `<model_directory>`:
 * **For GluonCV SSD topologies:**
 ```sh
-python3 mo_mxnet.py --input_model <model_directory>/ssd_512_mobilenet1.0.params --enable_ssd_gluoncv --input_shape [1,512,512,3] --input data
+python3 mo_mxnet.py --input_model <model_directory>/ssd_512_mobilenet1.0.params --enable_ssd_gluoncv --input_shape [1,512,512,3] --input data --output_dir <OUTPUT_MODEL_DIR>
 ```
 * **For YOLO-v3 topology:**
    * To convert the model:
    ```sh
-   python3 mo_mxnet.py --input_model <model_directory>/yolo3_mobilenet1.0_voc-0000.params  --input_shape [1,255,255,3]
+   python3 mo_mxnet.py --input_model <model_directory>/yolo3_mobilenet1.0_voc-0000.params  --input_shape [1,255,255,3] --output_dir <OUTPUT_MODEL_DIR>
    ```
    * To convert the model with replacing the subgraph with RegionYolo layers:
    ```sh
-   python3 mo_mxnet.py --input_model <model_directory>/models/yolo3_mobilenet1.0_voc-0000.params  --input_shape [1,255,255,3] --transformations_config "mo/extensions/front/mxnet/yolo_v3_mobilenet1_voc.json"
+   python3 mo_mxnet.py --input_model <model_directory>/models/yolo3_mobilenet1.0_voc-0000.params  --input_shape [1,255,255,3] --transformations_config "mo/extensions/front/mxnet/yolo_v3_mobilenet1_voc.json" --output_dir <OUTPUT_MODEL_DIR>
    ```
diff --git a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
index f6a9f189750..f0ec23d5a9f 100644
--- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
+++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md
@@ -11,6 +11,8 @@ To use the style transfer sample from OpenVINO&trade;, follow the steps below as
 sudo apt-get install python-tk
 ```
 
+Installing python-tk step is needed only for Linux, as it is included by default in Python\* for Windows\*.
+
 2. Install Python\* requirements:
 ```sh
 pip3 install --user mxnet
@@ -64,32 +66,31 @@ arg_dict.update(args)
 
 6. Use `arg_dict` instead of `args` as a parameter of the `decoder.bind()` function. Replace the line:<br>
 ```py
-self.deco_executor = decoder.bind(ctx=mx.cpu(), args=args, aux_states=auxs)
+self.deco_executor = decoder.bind(ctx=mx.gpu(), args=args, aux_states=auxs)
 ```
 with the following:<br>
 ```py
 self.deco_executor = decoder.bind(ctx=mx.cpu(), args=arg_dict, aux_states=auxs)
 ```
-7. Replace all `mx.gpu` with `mx.cpu` in the `decoder.bind()` function.
-8. To save the result model as a `.json` file, add the following code to the end of the `generate()` function in the `Maker` class:<br>
+7. To save the result model as a `.json` file, add the following code to the end of the `generate()` function in the `Maker` class:<br>
 ```py
 self.vgg_executor._symbol.save('{}-symbol.json'.format('vgg19'))
 self.deco_executor._symbol.save('{}-symbol.json'.format('nst_vgg19'))
 ```
-9. Save and close the `make_image.py` file.
+8. Save and close the `make_image.py` file.
 
-#### 5. Run the sample with a decoder model according to the instructions from the `README.md` file in the cloned repository.
+#### 5. Run the sample with a decoder model according to the instructions from the `README.md` file in the `fast_mrf_cnn` directory of the cloned repository.
 For example, to run the sample with the pre-trained decoder weights from the `models` folder and output shape, use the following code:<br>
 ```py
 import make_image
 maker = make_image.Maker('models/13', (1024, 768))
 maker.generate('output.jpg', '../images/tubingen.jpg')
 ```
-Where `'models/13'` string is composed of the following sub-strings: 
-* `'models/'` - path to the folder that contains .nd files with pre-trained styles weights and `'13'`
-*  Decoder prefix: the repository contains a default decoder, which is the 13_decoder. 
+Where the `models/13` string is composed of the following substrings: 
+* `models/`: path to the folder that contains .nd files with pre-trained styles weights 
+* `13`: prefix pointing to 13_decoder, which is the default decoder for the repository
 
-You can choose any style from [collection of pre-trained weights](https://pan.baidu.com/s/1skMHqYp). The `generate()` function generates `nst_vgg19-symbol.json` and `vgg19-symbol.json` files for the specified shape. In the code, it is [1024 x 768] for a 4:3 ratio, and you can specify another, for example, [224,224] for a square ratio.
+You can choose any style from [collection of pre-trained weights](https://pan.baidu.com/s/1skMHqYp). (On the Chinese-language page, click the down arrow next to a size in megabytes. Then wait for an overlay box to appear, and click the blue button in it to download.) The `generate()` function generates `nst_vgg19-symbol.json` and `vgg19-symbol.json` files for the specified shape. In the code, it is [1024 x 768] for a 4:3 ratio, and you can specify another, for example, [224,224] for a square ratio.
 
 #### 6. Run the Model Optimizer to generate an Intermediate Representation (IR):
 
diff --git a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_DLRM.md b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_DLRM.md
index 2d12ee0a1e7..341ad42c955 100644
--- a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_DLRM.md
+++ b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_DLRM.md
@@ -4,7 +4,7 @@
 
 These instructions are applicable only to the DLRM converted to the ONNX* file format from the [facebookresearch/dlrm model](https://github.com/facebookresearch/dlrm).
 
-**Step 1**. Save trained Pytorch* model to ONNX* format. If you training model using [script provided in model repository](https://github.com/facebookresearch/dlrm/blob/master/dlrm_s_pytorch.py) just add `--save-onnx` flag to the command line parameters and you'll get `dlrm_s_pytorch.onnx` file containing model serialized in ONNX* format.
+**Step 1**. Save trained Pytorch* model to ONNX* format. If you train the model using the [script provided in model repository](https://github.com/facebookresearch/dlrm/blob/master/dlrm_s_pytorch.py), just add the `--save-onnx` flag to the command line parameters and you'll get the `dlrm_s_pytorch.onnx` file containing the model serialized in ONNX* format.
 
 **Step 2**. To generate the Intermediate Representation (IR) of the model, change your current working directory to the Model Optimizer installation directory and run the Model Optimizer with the following parameters:
 ```sh
diff --git a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md
index c2117ee5168..cd9c49f46f4 100644
--- a/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md
+++ b/docs/MO_DG/prepare_model/convert_model/onnx_specific/Convert_GPT2.md
@@ -13,5 +13,5 @@ To download the model and sample test data, click **Download** on [https://githu
 
 To generate the Intermediate Representation (IR) of the model GPT-2, run the Model Optimizer with the following parameters:
 ```sh
-python3 mo.py --input_model gpt2-10.onnx --input_shape [X,Y,Z]
+python3 mo.py --input_model gpt2-10.onnx --input_shape [X,Y,Z] --output_dir <OUTPUT_MODEL_DIR>
 ```
diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
index a3f244fa55f..ffb16eb5f7c 100644
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_F3Net.md
@@ -4,7 +4,7 @@
 
 ## Download and Convert the Model to ONNX*
 
-To download the pretrained model or train the model yourself, refer to the 
+To download the pre-trained model or train the model yourself, refer to the 
 [instruction](https://github.com/weijun88/F3Net/blob/master/README.md) in the F3Net model repository. Firstly, 
 convert the model to ONNX\* format. Create and run the script with the following content in the `src`
 directory of the model repository:
diff --git a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
index ce0e582875c..ed072ac64f4 100644
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md
@@ -136,7 +136,7 @@ git clone https://github.com/dbolya/yolact
 git checkout 57b8f2d95e62e2e649b382f516ab41f949b57239
 ```
 
-**Step 2**. Download a pretrained model, for example `yolact_base_54_800000.pth`.
+**Step 2**. Download a pre-trained model, for example `yolact_base_54_800000.pth`.
 
 **Step 3**. Export the model to ONNX* format.
 
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md
index 906ca8c1e4d..efb930a4e1e 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md
@@ -9,7 +9,7 @@ have another implementation of CRNN model, you can convert it to IR in similar w
 **To convert this model to the IR:**
 
 **Step 1.** Clone this GitHub repository and checkout the commit:
-    1. Clone reposirory:
+    1. Clone repository:
 ```sh
  git clone https://github.com/MaybeShewill-CV/CRNN_Tensorflow.git
 ```
@@ -18,7 +18,7 @@ have another implementation of CRNN model, you can convert it to IR in similar w
 git checkout 64f1f1867bffaacfeacc7a80eebf5834a5726122
 ```
 
-**Step 2.** Train the model using framework or use the pretrained checkpoint provided in this repository.
+**Step 2.** Train the model using framework or use the pre-trained checkpoint provided in this repository.
 
 **Step 3.** Create an inference graph:
     1. Go to the `CRNN_Tensorflow` directory with the cloned repository:
@@ -31,7 +31,7 @@ cd path/to/CRNN_Tensorflow
 export PYTHONPATH="${PYTHONPATH}:/path/to/CRNN_Tensorflow/"
 ```
        * For  Windows\* OS add `/path/to/CRNN_Tensorflow/` to the `PYTHONPATH` environment variable in settings.
-    3. Open the `tools/demo_shadownet.py` script. After `saver.restore(sess=sess, save_path=weights_path)` line, add the following code:
+    3. Open the `tools/test_shadownet.py` script. After `saver.restore(sess=sess, save_path=weights_path)` line, add the following code:
 ```python
 from tensorflow.python.framework import graph_io
 frozen = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['shadow/LSTMLayers/transpose_time_major'])
@@ -39,7 +39,7 @@ graph_io.write_graph(frozen, '.', 'frozen_graph.pb', as_text=False)
 ```
     4. Run the demo with the following command:
 ```sh
-python tools/demo_shadownet.py --image_path data/test_images/test_01.jpg --weights_path model/shadownet/shadownet_2017-10-17-11-47-46.ckpt-199999
+python tools/test_shadownet.py --image_path data/test_images/test_01.jpg --weights_path model/shadownet/shadownet_2017-10-17-11-47-46.ckpt-199999
 ```
    If you want to use your checkpoint, replace the path in the `--weights_path` parameter with a path to your checkpoint.
     5. In the `CRNN_Tensorflow` directory, you will find the inference CRNN graph `frozen_graph.pb`. You can use this graph with the OpenVINO&trade; toolkit
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md
index 4b8bd1e4048..74833cf3ad3 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md
@@ -34,20 +34,15 @@ Pre-trained frozen model file is `output_graph.pb`.
 As you can see, the frozen model still has two variables: `previous_state_c` and
 `previous_state_h`. It means that the model keeps training those variables at each inference.
 
-At the first inference of this graph, the variables are initialized by zero tensors. After executing the
-`lstm_fused_cell` nodes, cell state and hidden state, which are the results of the `BlockLSTM` execution,
-are assigned to these two variables.
+At the first inference of this graph, the variables are initialized by zero tensors. After executing the `lstm_fused_cell` nodes, cell state and hidden state, which are the results of the `BlockLSTM` execution, are assigned to these two variables.
 
-With each inference of the DeepSpeech graph, initial cell state and hidden state data for `BlockLSTM` is taken
-from previous inference from variables. Outputs (cell state and hidden state) of `BlockLSTM` are reassigned
-to the same variables.
+With each inference of the DeepSpeech graph, initial cell state and hidden state data for `BlockLSTM` is taken from previous inference from variables. Outputs (cell state and hidden state) of `BlockLSTM` are reassigned to the same variables.
 
 It helps the model to remember the context of the words that it takes as input.
 
 ## Convert the TensorFlow* DeepSpeech Model to IR
 
-The Model Optimizer assumes that the output model is for inference only. That is why you should cut those variables off and
-resolve keeping cell and hidden states on the application level.
+The Model Optimizer assumes that the output model is for inference only. That is why you should cut those variables off and resolve keeping cell and hidden states on the application level.
 
 There are certain limitations for the model conversion:
 - Time length (`time_len`) and sequence length (`seq_len`) are equal.
@@ -55,11 +50,11 @@ There are certain limitations for the model conversion:
 
 To generate the DeepSpeech Intermediate Representation (IR), provide the TensorFlow DeepSpeech model to the Model Optimizer with the following parameters:
 ```sh
-python3 ./mo_tf.py
---input_model path_to_model/output_graph.pb                         \
---freeze_placeholder_with_value input_lengths->[16]                 \
---input input_node,previous_state_h/read,previous_state_c/read      \
---input_shape [1,16,19,26],[1,2048],[1,2048]                        \
+python3 ./mo_tf.py                                                      \
+--input_model path_to_model/output_graph.pb                             \
+--freeze_placeholder_with_value input_lengths->[16]                     \
+--input input_node,previous_state_h/read,previous_state_c/read          \
+--input_shape [1,16,19,26],[1,2048],[1,2048]                            \
 --output raw_logits,lstm_fused_cell/GatherNd,lstm_fused_cell/GatherNd_1 \
 --disable_nhwc_to_nchw
 ```
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
index 6362f018132..b78ec640cba 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_EfficientDet_Models.md
@@ -1,6 +1,6 @@
 # Converting EfficientDet Models from TensorFlow {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_EfficientDet_Models}
 
-This tutorial explains how to convert detection EfficientDet\* public models to the Intermediate Representation (IR). 
+This tutorial explains how to convert EfficientDet\* public object detection models to the Intermediate Representation (IR). 
 
 ## <a name="efficientdet-to-ir"></a>Convert EfficientDet Model to IR
 
@@ -24,10 +24,11 @@ git checkout 96e1fee
 3. Install required dependencies:<br>
 ```sh
 python3 -m pip install --upgrade pip
-python3 -m pip install -r automl/efficientdet/requirements.txt
+python3 -m pip install -r requirements.txt
+python3 -m pip install --upgrade tensorflow-model-optimization
 ```
 4. Download and extract the model checkpoint [efficientdet-d4.tar.gz](https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz)
-referenced in the "Pretrained EfficientDet Checkpoints" section of the model repository:<br>
+referenced in the "Pre-trained EfficientDet Checkpoints" section of the model repository:<br>
 ```sh
 wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d4.tar.gz
 tar zxvf efficientdet-d4.tar.gz
@@ -46,9 +47,9 @@ As a result the frozen model file `savedmodeldir/efficientdet-d4_frozen.pb` will
 
 To generate the IR of the EfficientDet TensorFlow model, run:<br>
 ```sh
-python3 $MO_ROOT/mo.py \
+python3 $INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/mo.py \
 --input_model savedmodeldir/efficientdet-d4_frozen.pb \
---transformations_config $MO_ROOT/extensions/front/tf/automl_efficientdet.json \
+--transformations_config $INTEL_OPENVINO_DIR/deployment_tools/model_optimizer/extensions/front/tf/automl_efficientdet.json \
 --input_shape [1,$IMAGE_SIZE,$IMAGE_SIZE,3] \
 --reverse_input_channels
 ```
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
index 587e2f53db3..72fe1db6aa8 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
@@ -137,7 +137,7 @@ index f5823d8..a733748 100644
 ```
 3. Save and close the file.
 
-## Convert GNMT Model to the IR
+## Convert GNMT Model to IR
 
 > **NOTE**: Please, use TensorFlow version 1.13 or lower.
 
@@ -155,7 +155,7 @@ git checkout b278487980832417ad8ac701c672b5c3dc7fa553
 **Step 2**. Get a trained model. You have two options:
 
 * Train the model with the GNMT `wmt16_gnmt_4_layer.json` or `wmt16_gnmt_8_layer.json` configuration file using the NMT framework.
-* Use the pretrained checkpoints provided in the NMT repository. Refer to the [Benchmarks](https://github.com/tensorflow/nmt#benchmarks) section for more information (*checkpoints in this section are outdated and can be incompatible with the current repository version. To avoid confusion, train a model by yourself*).
+* *Do not use the pre-trained checkpoints provided in the NMT repository, as they are outdated and can be incompatible with the current repository version.*
 
 This tutorial assumes the use of the trained GNMT model from `wmt16_gnmt_4_layer.json` config, German to English translation.
 
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md
index 6e03abe921d..c9526ef9da0 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md
@@ -2,8 +2,7 @@
 
 This tutorial explains how to convert Neural Collaborative Filtering (NCF) model to Intermediate Representation (IR).
 
-[Public TensorFlow NCF model](https://github.com/tensorflow/models/tree/master/official/recommendation) does not contain
- pretrained weights. To convert this model to the IR:
+[Public TensorFlow NCF model](https://github.com/tensorflow/models/tree/master/official/recommendation) does not contain pre-trained weights. To convert this model to the IR:
  1. Use [the instructions](https://github.com/tensorflow/models/tree/master/official/recommendation#train-and-evaluate-model) from this repository to train the model. 
  2. Freeze the inference graph you get on previous step in `model_dir` following 
 the instructions from the Freezing Custom Models in Python* section of 
@@ -24,25 +23,27 @@ graph_io.write_graph(frozen, './', 'inference_graph.pb', as_text=False)
 where `rating/BiasAdd` is an output node.
 
  3. Convert the model to the IR.If you look at your frozen model, you can see that 
-it has one input that is split to four `ResourceGather` layers.
+it has one input that is split into four `ResourceGather` layers. (Click image to zoom in.)
 
 ![NCF model beginning](../../../img/NCF_start.png)
 
  But as the Model Optimizer does not support such data feeding, you should skip it. Cut 
 the edges incoming in `ResourceGather`s port 1:
 ```sh
-python3 mo_tf.py --input_model inference_graph.pb \
---input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup,\
-1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup \
---input_shape [256],[256],[256],[256]
+python3 mo_tf.py --input_model inference_graph.pb                    \
+--input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup, \
+1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup        \
+--input_shape [256],[256],[256],[256]                                \
+--output_dir <OUTPUT_MODEL_DIR>
 ```
-Where 256 is a `batch_size` you choose for your model.
+In the `input_shape` parameter, 256 specifies the `batch_size` for your model.
 
 Alternatively, you can do steps 2 and 3 in one command line:
 ```sh
-python3 mo_tf.py --input_meta_graph /path/to/model/model.meta \
---input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup,\
-1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup \
---input_shape [256],[256],[256],[256] --output rating/BiasAdd
+python3 mo_tf.py --input_meta_graph /path/to/model/model.meta        \
+--input 1:embedding/embedding_lookup,1:embedding_1/embedding_lookup, \
+1:embedding_2/embedding_lookup,1:embedding_3/embedding_lookup        \
+--input_shape [256],[256],[256],[256] --output rating/BiasAdd        \
+--output_dir <OUTPUT_MODEL_DIR>
 ```
 
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
index 6683d6b9b8a..6feec5f627a 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
@@ -8,8 +8,7 @@
 
 With 2018 R3 release, the Model Optimizer introduces a new approach to convert models created using the TensorFlow\* Object Detection API. Compared with the previous approach, the new process produces inference results with higher accuracy and does not require modifying any configuration files and providing intricate command line parameters.
 
-You can download TensorFlow\* Object Detection API models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md">TensorFlow 1 Detection Model Zoo</a>
-or <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md">TensorFlow 2 Detection Model Zoo</a>.
+You can download TensorFlow\* Object Detection API models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md">TensorFlow 1 Detection Model Zoo</a> or <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md">TensorFlow 2 Detection Model Zoo</a>.
 
 <strong>NOTE</strong>: Before converting, make sure you have configured the Model Optimizer. For configuration steps, refer to [Configuring the Model Optimizer](../../Config_Model_Optimizer.md).
 
@@ -56,7 +55,7 @@ For example, if you downloaded the [pre-trained SSD InceptionV2 topology](http:/
 ```
 
 ## Custom Input Shape <a name="tf_od_custom_input_shape"></a>
-Model Optimizer handles command line parameter `--input_shape` for TensorFlow\* Object Detection API models in a special way depending on the image resizer type defined in the `pipeline.config` file. TensorFlow\* Object Detection API generates different `Preprocessor` sub-graph based on the image resizer type. Model Optimizer supports two types of image resizer:
+Model Optimizer handles the command line parameter `--input_shape` for TensorFlow\* Object Detection API models in a special way depending on the image resizer type defined in the `pipeline.config` file. TensorFlow\* Object Detection API generates different `Preprocessor` sub-graph based on the image resizer type. Model Optimizer supports two types of image resizer:
 * `fixed_shape_resizer` --- *Stretches* input image to the specific height and width. The `pipeline.config` snippet below shows a `fixed_shape_resizer` sample definition:
 ```
 image_resizer {
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
index 84821d6b41c..ba781f17880 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_WideAndDeep_Family_Models.md
@@ -19,6 +19,9 @@ git clone https://github.com/tensorflow/models.git --branch r2.2.0;
 cd official/r1/wide_deep
 ```
 
+The Wide and Deep model is no longer in the master branch of the repository but is still available in the r2.2.0 branch.
+
+
 **Step 2**. Train the model
 
 As the OpenVINO&trade; toolkit does not support the categorical with hash and crossed features, such feature types must be switched off in the model 
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
index 493f05ba854..cc121ab19e1 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_XLNet_From_Tensorflow.md
@@ -35,7 +35,7 @@ To get pb-file from the archive contents, you need to do the following.
 
    
 
-2. Save and run the following script:
+2. Save and run the following Python script in `~/XLNet-Base/xlnet`:
 
 ```python
 from collections import namedtuple
@@ -92,7 +92,6 @@ with tf.Session() as sess:
         writer.flush()
 ```
 
-The script should save into `~/XLNet-Base/xlnet`.
 
 ## Download the Pre-Trained Large XLNet Model
 
@@ -120,7 +119,7 @@ To get pb-file from the archive contents, you need to do the following.
 
 
 
-2. Save and run the following script:
+2. Save and run the following Python script in `~/XLNet-Large/xlnet`:
 
 ```python
 from collections import namedtuple
@@ -185,6 +184,6 @@ The script should save into `~/XLNet-Large/xlnet`.
 
 To generate the XLNet Intermediate Representation (IR) of the model, run the Model Optimizer with the following parameters:
 ```sh
-python3 mo.py --input_model path-to-model/model_frozen.pb  --input "input_mask[50 1],input_ids[50 1],seg_ids[50 1]" --log_level DEBUG --disable_nhwc_to_nchw
+python3 mo.py --input_model path-to-model/model_frozen.pb  --input "input_mask[50 1],input_ids[50 1],seg_ids[50 1]" --log_level DEBUG --disable_nhwc_to_nchw --output_dir <OUTPUT_MODEL_DIR>
 ```
 
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
index 109714dcea6..653165576ce 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_YOLO_From_Tensorflow.md
@@ -35,7 +35,7 @@ cd tensorflow-yolo-v3
 git checkout ed60b90
 ```
 3. Download [coco.names](https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names) file from the DarkNet website **OR** use labels that fit your task.
-4. Download the [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) (for the YOLOv3 model) or [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) (for the YOLOv3-tiny model) file **OR** use your pretrained weights with the same structure
+4. Download the [yolov3.weights](https://pjreddie.com/media/files/yolov3.weights) (for the YOLOv3 model) or [yolov3-tiny.weights](https://pjreddie.com/media/files/yolov3-tiny.weights) (for the YOLOv3-tiny model) file **OR** use your pre-trained weights with the same structure
 5. Run a converter:
 - for YOLO-v3:
 ```sh
@@ -89,18 +89,20 @@ where:
 
 To generate the IR of the YOLOv3 TensorFlow model, run:<br>
 ```sh
-python3 mo_tf.py
---input_model /path/to/yolo_v3.pb
---transformations_config $MO_ROOT/extensions/front/tf/yolo_v3.json
---batch 1
+python3 mo_tf.py                                                   \
+--input_model /path/to/yolo_v3.pb                                  \
+--transformations_config $MO_ROOT/extensions/front/tf/yolo_v3.json \
+--batch 1                                                          \
+--output_dir <OUTPUT_MODEL_DIR>
 ```
 
 To generate the IR of the YOLOv3-tiny TensorFlow model, run:<br>
 ```sh
-python3 mo_tf.py
---input_model /path/to/yolo_v3_tiny.pb
---transformations_config $MO_ROOT/extensions/front/tf/yolo_v3_tiny.json
---batch 1
+python3 mo_tf.py                                                        \
+--input_model /path/to/yolo_v3_tiny.pb                                  \
+--transformations_config $MO_ROOT/extensions/front/tf/yolo_v3_tiny.json \
+--batch 1                                                               \
+--output_dir <OUTPUT_MODEL_DIR>
 ```
 
 where:
diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
index 99b4cd703c1..cda8458e4dd 100644
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
@@ -32,8 +32,7 @@
       - <a href="#generic-back-phase-transformations">Generic Back Phase Transformations</a>
 - <a href="#see-also">See Also</a>
 
-<a name="model-optimizer-extensibility"></a>Model Optimizer extensibility mechanism enables support of new operations and custom transformations to generate the
-optimized intermediate representation (IR) as described in the
+<a name="model-optimizer-extensibility"></a>Model Optimizer extensibility mechanism enables support of new operations and custom transformations to generate the optimized intermediate representation (IR) as described in the 
 [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../../IR_and_opsets.md). This
 mechanism is a core part of the Model Optimizer. The Model Optimizer itself uses it under the hood, being a huge set of examples on how to add custom logic to support your model.
 
@@ -42,9 +41,8 @@ There are several cases when the customization is needed:
 * A model contains operation(s) not known for the Model Optimizer, but these operation(s) could be expressed as a
 combination of supported operations. In this case, a custom transformation should be implemented to replace unsupported
 operation(s) with supported ones.
-* A model contains sub-graph of operations that can be replaced with a smaller number of operations to get the better
-performance. This example corresponds to so called fusing transformations. For example, replace a sub-graph performing
-the following calculation \f$x / (1.0 + e^{-(beta * x)})\f$ with a single operation of type
+* A model contains a sub-graph of operations that can be replaced with a smaller number of operations to get better
+performance. This example corresponds to so-called *fusing transformations*, for example, replacing a sub-graph performing the calculation \f$x / (1.0 + e^{-(beta * x)})\f$ with a single operation of type
 [Swish](../../../ops/activation/Swish_4.md).
 * A model contains a custom framework operation (the operation that is not a part of an official operation set of the
 framework) that was developed using the framework extensibility mechanism. In this case, the Model Optimizer should know
diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md
index aa3b5697242..e20a44969cf 100644
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md
@@ -1,12 +1,11 @@
 # Extending Model Optimizer for Custom MXNet* Operations {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_MXNet_Model_Optimizer_with_New_Primitives}
 
-This section provides instruction on how to support a custom MXNet operation (or as it called in the MXNet documentation
-"operator" or "layer") which is not a part of the MXNet operation set. For example, if the operator is implemented using
-the following [guide](https://mxnet.apache.org/versions/1.7.0/api/faq/new_op.html).
+This section provides instruction on how to support a custom MXNet operation (in the MXNet documentation, called an *operator* or *layer*) that is not part of the MXNet operation set. Creating custom operations is described in 
+[this guide](https://mxnet.apache.org/versions/1.7.0/api/faq/new_op.html).
 
 This section describes a procedure on how to extract operator attributes in the Model Optimizer. The rest of the
-operation enabling pipeline and documentation on how to support MXNet operations from standard MXNet operation set is
-described in the main document [Customize_Model_Optimizer](Customize_Model_Optimizer.md).
+operation-enabling pipeline and documentation on how to support MXNet operations from standard MXNet operation set is
+described in the main [Customize_Model_Optimizer](Customize_Model_Optimizer.md) document.
 
 ## Writing Extractor for Custom MXNet Operation
 Custom MXNet operations have an attribute `op` (defining the type of the operation) equal to `Custom` and attribute
diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md
index c79da3ef0ef..e4a71a8fdc9 100644
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md
@@ -1,10 +1,9 @@
 # Extending Model Optimizer with Caffe* Python Layers {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_With_Caffe_Python_Layers}
 
 This section provides instruction on how to support a custom Caffe operation written only in Python. For example, the
-[Faster-R-CNN model]((http://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0)) implemented in
-Caffe contains a custom layer Proposal written in Python. The layer is described in the
-[Faster-R-CNN protoxt](https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/models/pascal_voc/VGG16/faster_rcnn_end2end/test.prototxt)
-the following way:
+[Faster-R-CNN model](http://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0) implemented in
+Caffe contains a custom proposal layer written in Python. The layer is described in the
+[Faster-R-CNN prototxt](https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/models/pascal_voc/VGG16/faster_rcnn_end2end/test.prototxt) in the following way:
 ```sh
 layer {
   name: 'proposal'
diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md
index 9fb0e9b26f2..bb7ef070f38 100644
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md
@@ -1,3 +1,3 @@
-# Extending Model Optimizer with New Primitives {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_with_New_Primitives}
+# [DEPRECATED] Extending Model Optimizer with New Primitives {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_with_New_Primitives}
 
-This page is deprecated. Please, refer to [Model Optimizer Extensibility](Customize_Model_Optimizer.md) page for more information.
+This page is deprecated. Please refer to [Model Optimizer Extensibility](Customize_Model_Optimizer.md) page for more information.
diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Legacy_Mode_for_Caffe_Custom_Layers.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Legacy_Mode_for_Caffe_Custom_Layers.md
index c106d489ea8..6b047819461 100644
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Legacy_Mode_for_Caffe_Custom_Layers.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Legacy_Mode_for_Caffe_Custom_Layers.md
@@ -1,6 +1,6 @@
-# Legacy Mode for Caffe* Custom Layers  {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Legacy_Mode_for_Caffe_Custom_Layers}
+# [DEPRECATED] Legacy Mode for Caffe* Custom Layers  {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Legacy_Mode_for_Caffe_Custom_Layers}
 
-> **NOTE**: This functionality is deprecated and will be removed in the future releases.
+> **NOTE: This functionality is deprecated and will be removed in future releases.**
 
 Model Optimizer can register custom layers in a way that the output shape is calculated by the Caffe\* framework
 installed on your system. This approach has several limitations:
diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md
index 70bec8bdb4f..4883d2f3e09 100644
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md
@@ -1,4 +1,4 @@
-# Sub-Graph Replacement in the Model Optimizer  {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Subgraph_Replacement_Model_Optimizer}
+# [DEPRECATED] Sub-Graph Replacement in the Model Optimizer  {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Subgraph_Replacement_Model_Optimizer}
 
 The document has been deprecated. Refer to the [Model Optimizer Extensibility](Customize_Model_Optimizer.md)
 for the up-to-date documentation.
diff --git a/docs/get_started/get_started_dl_workbench.md b/docs/get_started/get_started_dl_workbench.md
index 795767f3c73..701f23f66d6 100644
--- a/docs/get_started/get_started_dl_workbench.md
+++ b/docs/get_started/get_started_dl_workbench.md
@@ -10,7 +10,7 @@ In this guide, you will:
 [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a web-based graphical environment that enables you to easily use various sophisticated
 OpenVINO™ toolkit components:
 * [Model Downloader](@ref omz_tools_downloader) to download models from the [Intel® Open Model Zoo](@ref omz_models_group_intel) 
-with pretrained models for a range of different tasks
+with pre-trained models for a range of different tasks
 * [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) to transform models into
 the Intermediate Representation (IR) format
 * [Post-Training Optimization toolkit](@ref pot_README) to calibrate a model and then execute it in the
@@ -70,7 +70,7 @@ The simplified OpenVINO™ DL Workbench workflow is:
 
 ## Run Baseline Inference
 
-This section illustrates a sample use case of how to infer a pretrained model from the [Intel® Open Model Zoo](@ref omz_models_group_intel) with an autogenerated noise dataset on a CPU device.
+This section illustrates a sample use case of how to infer a pre-trained model from the [Intel® Open Model Zoo](@ref omz_models_group_intel) with an autogenerated noise dataset on a CPU device.
 \htmlonly
 <iframe width="560" height="315" src="https://www.youtube.com/embed/9TRJwEmY0K4" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
 \endhtmlonly
@@ -82,7 +82,7 @@ Once you log in to the DL Workbench, create a project, which is a combination of
 On the the **Active Projects** page, click **Create** to open the **Create Project** page:
 ![](./dl_workbench_img/create_configuration.png)
 
-### Step 2. Choose a Pretrained Model
+### Step 2. Choose a Pre-trained Model
 
 Click **Import** next to the **Model** table on the **Create Project** page. The **Import Model** page opens. Select the squeezenet1.1 model from the Open Model Zoo and click **Import**.
 ![](./dl_workbench_img/import_model_02.png)
diff --git a/docs/get_started/get_started_linux.md b/docs/get_started/get_started_linux.md
index 3aa945a05a1..d64d63ed2fc 100644
--- a/docs/get_started/get_started_linux.md
+++ b/docs/get_started/get_started_linux.md
@@ -94,6 +94,13 @@ The script:
 <details>
     <summary><strong>Click for an example of running the Image Classification demo script</strong></summary>
 
+To preview the image that the script will classify:
+
+```sh
+cd ${INTEL_OPENVINO_DIR}/deployment_tools/demo
+eog car.png
+```
+
 To run the script to perform inference on a CPU:
 
 ```sh
@@ -173,11 +180,12 @@ The script:
 <details>
     <summary><strong>Click for an example of running the Benchmark demo script</strong></summary>
 
-To run the script that performs inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs:
+To run the script that performs inference (runs on CPU by default):
 
 ```sh
-./demo_squeezenet_download_convert_run.sh -d HDDL
+./demo_benchmark_app.sh
 ```
+
 When the verification script completes, you see the performance counters, resulting latency, and throughput values displayed on the screen.
 </details>
 
@@ -514,6 +522,24 @@ source /opt/intel/openvino_2021/bin/setupvars.sh
 
 ## <a name="syntax-examples"></a> Typical Code Sample and Demo Application Syntax Examples
 
+This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos_README) pages.
+
+To build all the demos and samples:
+
+```sh
+cd $INTEL_OPENVINO_DIR/inference_engine_samples/cpp
+# to compile C samples, go here also: cd <INSTALL_DIR>/inference_engine/samples/c
+build_samples.sh
+cd $INTEL_OPENVINO_DIR/deployment_tools/open_model_zoo/demos
+build_demos.sh
+```
+
+Depending on what you compiled, executables are in the directories below:
+
+* `~/inference_engine_samples_build/intel64/Release`
+* `~/inference_engine_cpp_samples_build/intel64/Release`
+* `~/inference_engine_demos_build/intel64/Release`
+
 Template to call sample code or a demo application:
 
 ```sh
diff --git a/docs/get_started/get_started_macos.md b/docs/get_started/get_started_macos.md
index 980b02d0be2..a15240a1c9b 100644
--- a/docs/get_started/get_started_macos.md
+++ b/docs/get_started/get_started_macos.md
@@ -95,9 +95,10 @@ The script:
 <details>
     <summary><strong>Click for an example of running the Image Classification demo script</strong></summary>
 
-To run the script to perform inference on a CPU:
+To run the script to view the sample image and perform inference on the CPU:
 
 ```sh
+open car.png
 ./demo_squeezenet_download_convert_run.sh
 ```
 
@@ -171,7 +172,7 @@ The script:
 To run the script that performs inference on a CPU:
 
 ```sh
-./demo_squeezenet_download_convert_run.sh
+./demo_benchmark_app.sh
 ```
 When the verification script completes, you see the performance counters, resulting latency, and throughput values displayed on the screen.
 </details>
@@ -210,7 +211,7 @@ You must have a model that is specific for you inference task. Example model typ
 - Custom (Often based on SSD)
 
 Options to find a model suitable for the OpenVINO™ toolkit are:
-- Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader). 
+- Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using the [Model Downloader tool](@ref omz_tools_downloader). 
 - Download from GitHub*, Caffe* Zoo, TensorFlow* Zoo, and other resources.
 - Train your own model.
         
@@ -312,6 +313,8 @@ Models in the Intermediate Representation format always include a pair of `.xml`
 - **REQUIRED:** `model_name.xml`
 - **REQUIRED:** `model_name.bin`
 
+The conversion may also create a `model_name.mapping` file, but it is not needed for running inference.
+
 This guide uses the public SqueezeNet 1.1 Caffe\* model to run the Image Classification Sample. See the example to download a model in the <a href="#download-models">Download Models</a> section to learn how to download this model.
 
 The `squeezenet1.1` model is downloaded in the Caffe* format. You must use the Model Optimizer to convert the model to the IR. 
@@ -376,7 +379,7 @@ To run the **Image Classification** code sample with an input image on the IR:
    ```
 3. Run the code sample executable, specifying the input media file, the IR of your model, and a target device on which you want to perform inference:
    ```sh
-   classification_sample_async -i <path_to_media> -m <path_to_model> -d <target_device>
+   ./classification_sample_async -i <path_to_media> -m <path_to_model> -d <target_device>
    ```
 <details>
     <summary><strong>Click for examples of running the Image Classification code sample on different devices</strong></summary>
@@ -473,6 +476,24 @@ source /opt/intel/openvino_2021/bin/setupvars.sh
 
 ## <a name="syntax-examples"></a> Typical Code Sample and Demo Application Syntax Examples
 
+This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.13 or later installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos_README) pages.
+
+To build all the demos and samples:
+
+```sh
+cd $INTEL_OPENVINO_DIR/inference_engine_samples/cpp
+# to compile C samples, go here also: cd <INSTALL_DIR>/inference_engine/samples/c
+build_samples.sh
+cd $INTEL_OPENVINO_DIR/deployment_tools/open_model_zoo/demos
+build_demos.sh
+```
+
+Depending on what you compiled, executables are in the directories below:
+
+* `~/inference_engine_samples_build/intel64/Release`
+* `~/inference_engine_cpp_samples_build/intel64/Release`
+* `~/inference_engine_demos_build/intel64/Release`
+
 Template to call sample code or a demo application:
 
 ```sh
@@ -482,8 +503,8 @@ Template to call sample code or a demo application:
 With the sample information specified, the command might look like this:
 
 ```sh
-./object_detection_demo_ssd_async -i ~/Videos/catshow.mp4 \
--m ~/ir/fp32/mobilenet-ssd.xml -d CPU
+cd $INTEL_OPENVINO_DIR/deployment_tools/open_model_zoo/demos/object_detection_demo
+./object_detection_demo -i ~/Videos/catshow.mp4 -m ~/ir/fp32/mobilenet-ssd.xml -d CPU
 ```
 
 ## <a name="advanced-samples"></a> Advanced Demo Use 
diff --git a/docs/get_started/get_started_windows.md b/docs/get_started/get_started_windows.md
index c8c7ee23d1f..253af476efb 100644
--- a/docs/get_started/get_started_windows.md
+++ b/docs/get_started/get_started_windows.md
@@ -96,6 +96,8 @@ The script:
 
 To run the script to perform inference on a CPU:
 
+1. Open the `car.png` file in any image viewer to see what the demo will be classifying.
+2. Run the following script:
 ```bat
 .\demo_squeezenet_download_convert_run.bat
 ```
@@ -167,10 +169,10 @@ The script:
 <details>
     <summary><strong>Click for an example of running the Benchmark demo script</strong></summary>
 
-To run the script that performs inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs:
+To run the script that performs inference (runs on CPU by default):
 
 ```bat
-.\demo_squeezenet_download_convert_run.bat -d HDDL
+.\demo_benchmark_app.bat
 ```
 When the verification script completes, you see the performance counters, resulting latency, and throughput values displayed on the screen.
 </details>
@@ -482,6 +484,24 @@ Below you can find basic guidelines for executing the OpenVINO™ workflow using
 
 ## <a name="syntax-examples"></a> Typical Code Sample and Demo Application Syntax Examples
 
+This section explains how to build and use the sample and demo applications provided with the toolkit. You will need CMake 3.10 or later and Microsoft Visual Studio 2017 or 2019 installed. Build details are on the [Inference Engine Samples](../IE_DG/Samples_Overview.md) and [Demo Applications](@ref omz_demos_README) pages.
+
+To build all the demos and samples:
+
+```sh
+cd $INTEL_OPENVINO_DIR\inference_engine_samples\cpp
+# to compile C samples, go here also: cd <INSTALL_DIR>\inference_engine\samples\c
+build_samples_msvc.bat
+cd $INTEL_OPENVINO_DIR\deployment_tools\open_model_zoo\demos
+build_demos_msvc.bat
+```
+
+Depending on what you compiled, executables are in the directories below:
+
+* `C:\Users\<user>\Documents\Intel\OpenVINO\inference_engine_c_samples_build\intel64\Release`
+* `C:\Users\<user>\Documents\Intel\OpenVINO\inference_engine_cpp_samples_build\intel64\Release`
+* `C:\Users\<username>\Documents\Intel\OpenVINO\omz_demos_build\intel64\Release`
+
 Template to call sample code or a demo application:
 
 ```bat
diff --git a/docs/index.md b/docs/index.md
index ee0739a1e1e..15d0c2a3a2b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -18,8 +18,8 @@ The following diagram illustrates the typical OpenVINO™ workflow (click to see
 
 ### Model Preparation, Conversion and Optimization
 
-You can use your framework of choice to prepare and train a Deep Learning model or just download a pretrained model from the Open Model Zoo. The Open Model Zoo includes Deep Learning solutions to a variety of vision problems, including object recognition, face recognition, pose estimation, text detection, and action recognition, at a range of measured complexities.
-Several of these pretrained models are used also in the [code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos). To download models from the Open Model Zoo, the [Model Downloader](@ref omz_tools_downloader) tool is used.
+You can use your framework of choice to prepare and train a deep learning model or just download a pre-trained model from the Open Model Zoo. The Open Model Zoo includes deep learning solutions to a variety of vision problems, including object recognition, face recognition, pose estimation, text detection, and action recognition, at a range of measured complexities.
+Several of these pre-trained models are used also in the [code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos_README). To download models from the Open Model Zoo, the [Model Downloader](@ref omz_tools_downloader_README) tool is used.
 
 One of the core component of the OpenVINO™ toolkit is the [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) a cross-platform command-line
 tool that converts a trained neural network from its source framework to an open-source, nGraph-compatible [Intermediate Representation (IR)](MO_DG/IR_and_opsets.md) for use in inference operations. The Model Optimizer imports models trained in popular frameworks such as Caffe*, TensorFlow*, MXNet*, Kaldi*, and ONNX* and performs a few optimizations to remove excess layers and group operations when possible into simpler, faster graphs.
@@ -49,7 +49,7 @@ For a full browser-based studio integrating these other key tuning utilities, tr
 
 OpenVINO™ toolkit includes a set of [inference code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos) showing how inference is run and output processed for use in retail environments, classrooms, smart camera applications, and other solutions.
 
-OpenVINO also makes use of open-Source and Intel™ tools for traditional graphics processing and performance management. Intel® Media SDK supports accelerated rich-media processing, including transcoding. OpenVINO™ optimizes calls to the rich OpenCV and OpenVX libraries for processing computer vision workloads. And the new DL Streamer integration further accelerates video pipelining and performance.
+OpenVINO also makes use of open-source and Intel™ tools for traditional graphics processing and performance management. Intel® Media SDK supports accelerated rich-media processing, including transcoding. OpenVINO™ optimizes calls to the rich OpenCV and OpenVX libraries for processing computer vision workloads. And the new DL Streamer integration further accelerates video pipelining and performance.
 
 Useful documents for inference tuning:
 * [Inference Engine Developer Guide](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
@@ -82,22 +82,22 @@ The Inference Engine's plug-in architecture can be extended to meet other specia
 
 Intel® Distribution of OpenVINO™ toolkit includes the following components:
 
-- [Deep Learning Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) - A cross-platform command-line tool for importing models and preparing them for optimal execution with the Inference Engine. The Model Optimizer imports, converts, and optimizes models, which were trained in popular frameworks, such as Caffe*, TensorFlow*, MXNet*, Kaldi*, and ONNX*.
-- [Deep Learning Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) - A unified API to allow high performance inference on many hardware types including Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, Intel® Vision Accelerator Design with Intel® Movidius™ vision processing unit (VPU).
-- [Inference Engine Samples](IE_DG/Samples_Overview.md) - A set of simple console applications demonstrating how to use the Inference Engine in your applications.
-- [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) -  A web-based graphical environment that allows you to easily use various sophisticated OpenVINO™ toolkit components.
-- [Post-Training Optimization tool](@ref pot_README) - A tool to calibrate a model and then execute it in the INT8 precision.
-- Additional Tools - A set of tools to work with your models including [Benchmark App](../inference-engine/tools/benchmark_tool/README.md), [Cross Check Tool](../inference-engine/tools/cross_check_tool/README.md), [Compile tool](../inference-engine/tools/compile_tool/README.md).
+- [Deep Learning Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md): A cross-platform command-line tool for importing models and preparing them for optimal execution with the Inference Engine. The Model Optimizer imports, converts, and optimizes models, which were trained in popular frameworks, such as Caffe*, TensorFlow*, MXNet*, Kaldi*, and ONNX*.
+- [Deep Learning Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md): A unified API to allow high performance inference on many hardware types including Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, Intel® Vision Accelerator Design with Intel® Movidius™ vision processing unit (VPU).
+- [Inference Engine Samples](IE_DG/Samples_Overview.md): A set of simple console applications demonstrating how to use the Inference Engine in your applications.
+- [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction): A web-based graphical environment that allows you to easily use various sophisticated OpenVINO™ toolkit components.
+- [Post-Training Optimization tool](@ref pot_README): A tool to calibrate a model and then execute it in the INT8 precision.
+- Additional Tools: A set of tools to work with your models including [Benchmark App](../inference-engine/tools/benchmark_tool/README.md), [Cross Check Tool](../inference-engine/tools/cross_check_tool/README.md), [Compile tool](../inference-engine/tools/compile_tool/README.md).
 - [Open Model Zoo](@ref omz_models_group_intel)     
-    - [Demos](@ref omz_demos) - Console applications that provide robust application templates to help you implement specific deep learning scenarios.
-    - Additional Tools - A set of tools to work with your models including [Accuracy Checker Utility](@ref omz_tools_accuracy_checker) and [Model Downloader](@ref omz_tools_downloader).
-    - [Documentation for Pretrained Models](@ref omz_models_group_intel) - Documentation for pretrained models that are available in the [Open Model Zoo repository](https://github.com/opencv/open_model_zoo).
-- Deep Learning Streamer (DL Streamer) – Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. DL Streamer can be installed by the Intel® Distribution of OpenVINO™ toolkit installer. Its open source version is available on [GitHub](https://github.com/opencv/gst-video-analytics). For the DL Streamer documentation, see:
+    - [Demos](@ref omz_demos): Console applications that provide robust application templates to help you implement specific deep learning scenarios.
+    - Additional Tools: A set of tools to work with your models including [Accuracy Checker Utility](@ref omz_tools_accuracy_checker) and [Model Downloader](@ref omz_tools_downloader).
+    - [Documentation for Pretrained Models](@ref omz_models_group_intel): Documentation for pre-trained models that are available in the [Open Model Zoo repository](https://github.com/opencv/open_model_zoo).
+- Deep Learning Streamer (DL Streamer): Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. DL Streamer can be installed by the Intel® Distribution of OpenVINO™ toolkit installer. Its open-source version is available on [GitHub](https://github.com/opencv/gst-video-analytics). For the DL Streamer documentation, see:
     - [DL Streamer Samples](@ref gst_samples_README)
     - [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/)
     - [Elements](https://github.com/opencv/gst-video-analytics/wiki/Elements)
     - [Tutorial](https://github.com/opencv/gst-video-analytics/wiki/DL%20Streamer%20Tutorial)
-- [OpenCV](https://docs.opencv.org/master/) - OpenCV* community version compiled for Intel® hardware
+- [OpenCV](https://docs.opencv.org/master/) : OpenCV* community version compiled for Intel® hardware
 - [Intel® Media SDK](https://software.intel.com/en-us/media-sdk) (in Intel® Distribution of OpenVINO™ toolkit for Linux only)
 
 OpenVINO™ Toolkit opensource version is available on [GitHub](https://github.com/openvinotoolkit/openvino). For building the Inference Engine from the source code, see the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">build instructions</a>.
\ No newline at end of file
diff --git a/docs/install_guides/PAC_Configure_2019RX.md b/docs/install_guides/PAC_Configure_2019RX.md
index 5e43876ec20..150ca475d65 100644
--- a/docs/install_guides/PAC_Configure_2019RX.md
+++ b/docs/install_guides/PAC_Configure_2019RX.md
@@ -45,7 +45,7 @@ cd a10_gx_pac_ias_1_2_pv_rte_installer
 4. Select **Y** to install OPAE and accept license and when asked, specify `/home/<user>/tools/intelrtestack` as the absolute install path. During the installation there should be a message stating the directory already exists as it was created in the first command above.  Select **Y** to install to this directory. If this message is not seen, it suggests that there was a typo when entering the install location.
 
 5. Tools are installed to the following directories:
-   * OpenCL™ Run-time Environment: `~/tools/intelrtestack/opencl_rte/aclrte-linux64`
+   * OpenCL™ Runtime Environment: `~/tools/intelrtestack/opencl_rte/aclrte-linux64`
    * Intel® Acceleration Stack for FPGAs: `~/tools/intelrtestack/a10_gx_pac_ias_1_2_pv`
   
 7. Check the version of the FPGA Interface Manager firmware on the PAC board.
diff --git a/docs/install_guides/deployment-manager-tool.md b/docs/install_guides/deployment-manager-tool.md
index 837ce3263e2..0989a3d5929 100644
--- a/docs/install_guides/deployment-manager-tool.md
+++ b/docs/install_guides/deployment-manager-tool.md
@@ -22,8 +22,7 @@ The Deployment Manager is a Python\* command-line tool that is delivered within
 
 ## Create Deployment Package Using Deployment Manager
 
-There are two ways to create a deployment package that includes inference-related components of the OpenVINO™ toolkit: <br>
-You can run the Deployment Manager tool in either Interactive or Standard CLI mode.
+There are two ways to create a deployment package that includes inference-related components of the OpenVINO™ toolkit: you can run the Deployment Manager tool in either interactive or standard CLI mode.
 
 ### Run Interactive Mode
 <details>
@@ -71,7 +70,7 @@ The following options are available:
    ```
 *	`[--output_dir]` — (Optional) Path to the output directory. By default, it set to your home directory.
 
-*	`[--archive_name]` — (Optional) Deployment archive name without extension. By default, it set to `openvino_deployment_package`.
+*	`[--archive_name]` — (Optional) Deployment archive name without extension. By default, it is set to `openvino_deployment_package`.
 
 *	`[--user_data]` — (Optional) Path to a directory with user data (IRs, models, datasets, etc.) required for inference. By default, it's set to `None`, which means that the user data are already present on the target host machine.
 
diff --git a/docs/install_guides/installing-openvino-linux.md b/docs/install_guides/installing-openvino-linux.md
index 955a50a0bae..a78fa8fc43d 100644
--- a/docs/install_guides/installing-openvino-linux.md
+++ b/docs/install_guides/installing-openvino-linux.md
@@ -284,13 +284,10 @@ The steps in this section are required only if you want to enable the toolkit co
 ```sh
 cd /opt/intel/openvino_2021/install_dependencies/
 ```
-2. Enter the super user mode:
+
+2. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. The drivers are not included in the package, to install it, make sure you have the internet connection and run the installation script:
 ```sh
-sudo -E su
-```
-3. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. The drivers are not included in the package, to install it, make sure you have the internet connection and run the installation script:
-```sh
-./install_NEO_OCL_driver.sh
+sudo -E ./install_NEO_OCL_driver.sh
 ```
    The script compares the driver version on the system to the current version. If the driver version on the system is higher or equal to the current version, the script does 
 not install a new driver. If the version of the driver is lower than the current version, the script uninstalls the lower and installs the current version with your permission:
diff --git a/docs/install_guides/installing-openvino-macos.md b/docs/install_guides/installing-openvino-macos.md
index 0797d625ca8..d878eac5c3a 100644
--- a/docs/install_guides/installing-openvino-macos.md
+++ b/docs/install_guides/installing-openvino-macos.md
@@ -24,7 +24,7 @@ The following components are installed by default:
 | Component                                                                                           | Description                                                                                                                                                                                                                                                  |
 | :-------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) | This tool imports, converts, and optimizes models, which were trained in popular frameworks, to a format usable by Intel tools, especially the Inference Engine. <br> Popular frameworks include Caffe*, TensorFlow*, MXNet\*, and ONNX\*. |
-| [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)               | This is the engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications.                                                                                                               |
+| [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)               | This is the engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications                                                                                                               |
 | [OpenCV\*](https://docs.opencv.org/master/)                                                         | OpenCV\* community version compiled for Intel® hardware                                                                                                                                                                                                      |
 | [Sample Applications](../IE_DG/Samples_Overview.md)                                                                                | A set of simple console applications demonstrating how to use the Inference Engine in your applications. |
 | [Demos](@ref omz_demos)                                   | A set of console applications that demonstrate how you can use the Inference Engine in your applications to solve specific use-cases  |
@@ -59,10 +59,15 @@ The development and target platforms have the same requirements, but you can sel
 
 **Software Requirements**
 
-- CMake 3.10 or higher
-- Python 3.6 - 3.7
-- Apple Xcode\* Command Line Tools
-- (Optional) Apple Xcode\* IDE (not required for OpenVINO, but useful for development)
+* CMake 3.10 or higher
+	+ [Install](https://cmake.org/download/) (choose "macOS 10.13 or later")
+	+ Add `/Applications/CMake.app/Contents/bin` to path (for default install) 
+* Python 3.6 - 3.7
+	+ [Install](https://www.python.org/downloads/mac-osx/) (choose 3.6.x or 3.7.x, not latest)
+	+ Add to path
+* Apple Xcode\* Command Line Tools
+	+ In the terminal, run `xcode-select --install` from any directory
+* (Optional) Apple Xcode\* IDE (not required for OpenVINO, but useful for development)
 
 **Operating Systems**
 
@@ -74,13 +79,13 @@ This guide provides step-by-step instructions on how to install the Intel® Dist
 
 The following steps will be covered:
 
-1. <a href="#Install-Core">Install the Intel® Distribution of OpenVINO™ Toolkit </a>.
+1. <a href="#Install-Core">Install the Intel® Distribution of OpenVINO™ Toolkit</a>.
 2. <a href="#set-the-environment-variables">Set the OpenVINO environment variables and (optional) Update to <code>.bash_profile</code></a>.
 3. <a href="#configure-the-model-optimizer">Configure the Model Optimizer</a>.
 4. <a href="#get-started">Get Started with Code Samples and Demo Applications</a>.
 5. <a href="#uninstall">Uninstall the Intel® Distribution of OpenVINO™ Toolkit</a>.
 
-## <a name="Install-Core"></a>Install the Intel® Distribution of OpenVINO™ toolkit Core Components
+## <a name="Install-Core"></a>Install the Intel® Distribution of OpenVINO™ Toolkit Core Components
 
 If you have a previous version of the Intel® Distribution of OpenVINO™ toolkit installed, rename or delete these two directories:
 
@@ -125,15 +130,15 @@ The disk image is mounted to `/Volumes/m_openvino_toolkit_p_<version>` and autom
 9. If needed, click **Customize** to change the installation directory or the components you want to install:
    ![](../img/openvino-install-macos-04.png)
    > **NOTE**: If there is an OpenVINO™ toolkit version previously installed on your system, the installer will use the same destination directory for next installations. If you want to install a newer version to a different directory, you need to uninstall the previously installed versions.
-   Click **Next** to save the installation options and show the Installation summary screen.
+10. Click **Next** to save the installation options and show the Installation summary screen.
 
-10. On the **Installation summary** screen, press **Install** to begin the installation.
+11. On the **Installation summary** screen, click **Install** to begin the installation.
 
-11. When the first part of installation is complete, the final screen informs you that the core components have been installed
+12. When the first part of installation is complete, the final screen informs you that the core components have been installed
    and additional steps still required:
    ![](../img/openvino-install-macos-05.png)
 
-12. Click **Finish** to close the installation wizard. A new browser window opens to the next section of the Installation Guide to set the environment variables. If the installation did not indicate you must install dependencies, you can move ahead to [Set the Environment Variables](#set-the-environment-variables).  If you received a message that you were missing external software dependencies, listed under **Software Requirements** at the top of this guide, you need to install them now before continuing on to the next section.
+13. Click **Finish** to close the installation wizard. A new browser window opens to the next section of the Installation Guide to set the environment variables. If the installation did not indicate you must install dependencies, you can move ahead to [Set the Environment Variables](#set-the-environment-variables).  If you received a message that you were missing external software dependencies, listed under **Software Requirements** at the top of this guide, you need to install them now before continuing on to the next section.
 
 ## <a name="set-the-environment-variables"></a>Set the Environment Variables
 
@@ -143,22 +148,26 @@ You need to update several environment variables before you can compile and run
    source /opt/intel/openvino_2021/bin/setupvars.sh
    ```  
 
+If you didn't choose the default installation option, replace `/opt/intel/openvino_2021` with your directory.
+
 <strong>Optional</strong>: The OpenVINO environment variables are removed when you close the shell. You can permanently set the environment variables as follows:
 
 1. Open the `.bash_profile` file in the current user home directory:
    ```sh
    vi ~/.bash_profile
    ```
-2. Press the **i** key to switch to the insert mode.
+2. Press the **i** key to switch to insert mode.
 
 3. Add this line to the end of the file:
    ```sh
    source /opt/intel/openvino_2021/bin/setupvars.sh
    ```
 
-3. Save and close the file: press the **Esc** key, type `:wq` and press the **Enter** key.
+If you didn't choose the default installation option, replace `/opt/intel/openvino_2021` with your directory.
 
-4. To verify your change, open a new terminal. You will see `[setupvars.sh] OpenVINO environment initialized`.
+4. Save and close the file: press the **Esc** key, type `:wq` and press the **Enter** key.
+
+5. To verify your change, open a new terminal. You will see `[setupvars.sh] OpenVINO environment initialized`.
 
 The environment variables are set. Continue to the next section to configure the Model Optimizer.
 
@@ -264,13 +273,13 @@ Proceed to the <a href="#get-started">Get Started</a> to get started with runnin
 
 Now you are ready to get started. To continue, see the following pages:
 * [OpenVINO™ Toolkit Overview](../index.md)
-* [Get Started Guide for Windows](../get_started/get_started_macos.md) to learn the basic OpenVINO™ toolkit workflow and run code samples and demo applications with pre-trained models on different inference devices.
+* [Get Started Guide for macOS](../get_started/get_started_macos.md) to learn the basic OpenVINO™ toolkit workflow and run code samples and demo applications with pre-trained models on different inference devices.
 
 ## <a name="uninstall"></a>Uninstall the Intel® Distribution of OpenVINO™ Toolkit
 
 Follow the steps below to uninstall the Intel® Distribution of OpenVINO™ Toolkit from your system:
 
-1. From the `<INSTALL_DIR>`, locate and open `openvino_toolkit_uninstaller.app`.
+1. From the the installation directory (by default, `/opt/intel/openvino_2021`), locate and open `openvino_toolkit_uninstaller.app`.
 2. Follow the uninstallation wizard instructions.
 3. When uninstallation is complete, click **Finish**. 
 
diff --git a/docs/install_guides/installing-openvino-raspbian.md b/docs/install_guides/installing-openvino-raspbian.md
index 0695ef9e772..14b354532e1 100644
--- a/docs/install_guides/installing-openvino-raspbian.md
+++ b/docs/install_guides/installing-openvino-raspbian.md
@@ -10,7 +10,11 @@
 
 The OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNN), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The OpenVINO toolkit includes the Intel® Deep Learning Deployment Toolkit (Intel® DLDT).
 
-The OpenVINO™ toolkit for Raspbian* OS includes the Inference Engine and the MYRIAD plugins. You can use it with the Intel® Neural Compute Stick 2 plugged in one of USB ports.
+The OpenVINO™ toolkit for Raspbian* OS includes the Inference Engine and the MYRIAD plugins. You can use it with the Intel® Neural Compute Stick 2 plugged into one of USB ports. This device is required for using the Intel® Distribution of OpenVINO™ toolkit.
+
+> **NOTE**: There is also an open-source version of OpenVINO™ that can be compiled for arch64 (see [build instructions](https://github.com/openvinotoolkit/openvino/wiki/BuildingForRaspbianStretchOS)).
+
+Because OpenVINO for Raspbian* OS doesn't include Model Optimizer, the ideal scenario is to use another machine to convert your model with Model Optimizer, then do your application development on the Raspberry Pi* for a convenient build/test cycle on the target platform.
 
 ### Included in the Installation Package
 
@@ -31,10 +35,9 @@ The OpenVINO toolkit for Raspbian OS is an archive with pre-installed header fil
 **Hardware**
 
 - Raspberry Pi\* board with ARM* ARMv7-A CPU architecture. Check that `uname -m` returns `armv7l`.
-- One of Intel® Movidius™ Visual Processing Units (VPU):
-- Intel® Neural Compute Stick 2
+- Intel® Neural Compute Stick 2, which as one of the Intel® Movidius™ Visual Processing Units (VPUs)
 
-> **NOTE**: With OpenVINO™ 2020.4 release, Intel® Movidius™ Neural Compute Stick is no longer supported.  
+> **NOTE**: With OpenVINO™ 2020.4 release, Intel® Movidius™ Neural Compute Stick (1) is no longer supported.  
 
 **Operating Systems**
 
@@ -62,7 +65,7 @@ This guide provides step-by-step instructions on how to install the OpenVINO™
 
 The guide assumes you downloaded the OpenVINO toolkit for Raspbian* OS. If you do not have a copy of the toolkit package file `l_openvino_toolkit_runtime_raspbian_p_<version>.tgz`, download the latest version from the [OpenVINO™ Toolkit packages storage](https://storage.openvinotoolkit.org/repositories/openvino/packages/) and then return to this guide to proceed with the installation.
 
-> **NOTE**: The OpenVINO toolkit for Raspbian OS is distributed without installer, so you need to perform extra steps comparing to the [Intel® Distribution of OpenVINO™ toolkit for Linux* OS](installing-openvino-linux.md).
+> **NOTE**: The OpenVINO toolkit for Raspbian OS is distributed without an installer, so you need to perform some extra steps compared to the [Intel® Distribution of OpenVINO™ toolkit for Linux* OS](installing-openvino-linux.md).
 
 1. Open the Terminal\* or your preferred console application.
 2. Go to the directory in which you downloaded the OpenVINO toolkit. This document assumes this is your `~/Downloads` directory. If not, replace `~/Downloads` with the directory where the file is located.
@@ -107,9 +110,8 @@ To test your change, open a new terminal. You will see the following:
 [setupvars.sh] OpenVINO environment initialized
 ```
 
-Continue to the next section to add USB rules for Intel® Neural Compute Stick 2 devices.
-
-## <a name="add-usb-rules"></a>Add USB Rules
+## <a name="add-usb-rules"></a>Add USB Rules for an Intel® Neural Compute Stick 2 device
+This task applies only if you have an Intel® Neural Compute Stick 2 device.
 
 1. Add the current Linux user to the `users` group:
    ```sh
@@ -126,11 +128,11 @@ Continue to the next section to add USB rules for Intel® Neural Compute Stick 2
    ```
 4. Plug in your Intel® Neural Compute Stick 2.
 
-You are ready to compile and run the Object Detection sample to verify the Inference Engine installation.
+You are now ready to compile and run the Object Detection sample to verify the Inference Engine installation.
 
 ## <a name="run-sample"></a>Build and Run Object Detection Sample
 
-Follow the next steps to run pre-trained Face Detection network using Inference Engine samples from the OpenVINO toolkit.
+Follow the next steps to use the pre-trained face detection model using Inference Engine samples from the OpenVINO toolkit.
 
 1. Navigate to a directory that you have write access to and create a samples build directory. This example uses a directory named `build`:
    ```sh
@@ -150,7 +152,7 @@ Follow the next steps to run pre-trained Face Detection network using Inference
    python3 -m pip install -r requirements.in
    python3 downloader.py --name face-detection-adas-0001 
    ```
-4. Run the sample with specifying the model and a path to the input image:
+4. Run the sample specifying the model, a path to the input image, and the VPU required to run with the Raspbian* OS:
    ```sh
    ./armv7l/Release/object_detection_sample_ssd -m face-detection-adas-0001.xml -d MYRIAD -i <path_to_image>
    ```
diff --git a/docs/install_guides/installing-openvino-windows.md b/docs/install_guides/installing-openvino-windows.md
index 56e963d1ea4..1a1a31a07c6 100644
--- a/docs/install_guides/installing-openvino-windows.md
+++ b/docs/install_guides/installing-openvino-windows.md
@@ -31,7 +31,7 @@ Your installation is complete when these are all completed:
 
     - <a href="#hddl-myriad">Install the drivers and software for the Intel® Vision Accelerator Design with Intel® Movidius™ VPUs</a>
 
-    - <a href="#Update-Path">Update Windows* environment variables</a>
+    - <a href="#Update-Path">Update Windows* environment variables</a> (necessary if you didn't choose the option to add Python to the path when you installed Python)
 
 Also, the following steps will be covered in the guide:
 - <a href="#get-started">Get Started with Code Samples and Demo Applications</a>
@@ -246,7 +246,7 @@ Or proceed to the <a href="#get-started">Get Started</a> to get started with run
 
 ###  <a name="Install-GPU"></a>Optional: Additional Installation Steps for Intel® Processor Graphics (GPU)
 
-> **NOTE**: These steps are required only if you want to use a GPU.
+> **NOTE**: These steps are required only if you want to use an Intel® integrated GPU.
 
 If your applications offload computation to **Intel® Integrated Graphics**, you must have the latest version of Intel Graphics Driver for Windows installed for your hardware. 
 [Download and install a higher version](http://downloadcenter.intel.com/product/80939/Graphics-Drivers). 
@@ -277,7 +277,7 @@ To perform inference on Intel® Vision Accelerator Design with Intel® Movidius
 
   1. Download and install <a href="https://www.microsoft.com/en-us/download/details.aspx?id=48145">Visual C++ Redistributable for Visual Studio 2017</a>
   2. Check with a support engineer if your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs card requires SMBUS connection to PCIe slot (most unlikely). Install the SMBUS driver only if confirmed (by default, it's not required):
-      1. Go to the `<INSTALL_DIR>\deployment_tools\inference-engine\external\hddl\SMBusDriver` directory, where `<INSTALL_DIR>` is the directory in which the Intel Distribution of OpenVINO toolkit is installed.
+      1. Go to the `<INSTALL_DIR>\deployment_tools\inference-engine\external\hddl\drivers\SMBusDriver` directory, where `<INSTALL_DIR>` is the directory in which the Intel Distribution of OpenVINO toolkit is installed.
       2. Right click on the `hddlsmbus.inf` file and choose **Install** from the pop up menu.
 
 You are done installing your device driver and are ready to use your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs.
@@ -313,7 +313,7 @@ Use these steps to update your Windows `PATH` if a command you execute returns a
 
 7. Click **OK** repeatedly to close each screen.
 
-Your `PATH` environment variable is updated.
+Your `PATH` environment variable is updated. If the changes don't take effect immediately, you may need to reboot.
 
 ## <a name="get-started"></a>Get Started
 
diff --git a/docs/nGraph_DG/intro.md b/docs/nGraph_DG/intro.md
index 032ecc8f610..f096321ef5d 100644
--- a/docs/nGraph_DG/intro.md
+++ b/docs/nGraph_DG/intro.md
@@ -11,17 +11,17 @@ Operations from these operation sets are generated by the Model Optimizer and ar
 2. Operation version is attached to each operation rather than to the entire IR file format.
 IR is still versioned but has a different meaning. For details, see [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../MO_DG/IR_and_opsets.md).
 
-3. Creating models in run-time without loading IR from an xml/binary file. You can enable it by creating
+3. Creating models at runtime without loading IR from an xml/binary file. You can enable it by creating
 `ngraph::Function` passing it to `CNNNetwork`.
 
-4. Run-time reshape capability and constant folding are implemented through the nGraph code for more operations compared to previous releases.
+4. Runtime reshape capability and constant folding are implemented through the nGraph code for more operations compared to previous releases.
 As a result, more models can be reshaped. For details, see the [dedicated guide about the reshape capability](../IE_DG/ShapeInference.md).
 
 5. Loading [model from ONNX format](../IE_DG/ONNX_Support.md) without converting it to the Inference Engine IR.
 
 6. nGraph representation supports dynamic shapes. You can use `CNNNetwork::reshape()` method in order to specialize input shapes.
 
-The complete picture of existed flow is presented below.
+A complete picture of the existing flow is shown below.
 
 ![](img/TopLevelNGraphFlow.png)
 
diff --git a/docs/nGraph_DG/nGraphTransformation.md b/docs/nGraph_DG/nGraphTransformation.md
index 5e88ccdf12c..e46f0dd8a02 100644
--- a/docs/nGraph_DG/nGraphTransformation.md
+++ b/docs/nGraph_DG/nGraphTransformation.md
@@ -27,7 +27,7 @@ Transformation flow in the transformation library has several layers:
 2. Transformations - Perform a particular transformation algorithm on `ngraph::Function`.
 3. Low-level functions - Take a set of nodes and perform some transformation action.
 They are not mandatory and all transformation code can be located inside the transformation.
-But if some transformation parts can potentially be reused in other transformations, we suggest keeping them as a separate functions.
+But if some transformation parts can potentially be reused in other transformations, we suggest keeping them as separate functions.
 
 ### Location for Your Transformation Code
 To decide where to store your transformation code, please follow these rules:
diff --git a/docs/nGraph_DG/nGraph_basic_concepts.md b/docs/nGraph_DG/nGraph_basic_concepts.md
index 2d6bed70272..4648c2613eb 100644
--- a/docs/nGraph_DG/nGraph_basic_concepts.md
+++ b/docs/nGraph_DG/nGraph_basic_concepts.md
@@ -4,8 +4,8 @@ The nGraph represents neural networks in uniform format. User can create differe
 
 ## nGraph Function and Graph Representation <a name="ngraph_function"></a>
 
-nGraph function is a very simple thing: it stores shared pointers to `ngraph::op::Parameter`, `ngraph::op::Result` and  `ngraph::op::Sink` operations that are inputs, outputs and sinks of the graph.
-Sinks of the graph have no consumers and not included into results vector. All other operations hold each other via shared pointers: child operation holds its parent (hard link). If operation has no consumers and it's not Result or Sink operation
+nGraph function is a very simple thing: it stores shared pointers to `ngraph::op::Parameter`, `ngraph::op::Result` and `ngraph::op::Sink` operations that are inputs, outputs and sinks of the graph.
+Sinks of the graph have no consumers and are not included in the results vector. All other operations hold each other via shared pointers: child operation holds its parent (hard link). If operation has no consumers and it's not Result or Sink operation
 (shared pointer counter is zero) then it will be destructed and won't be accessible anymore. Each operation in `ngraph::Function` has a `std::shared_ptr<ngraph::Node>` type.
 
 For details on how to build an nGraph Function, see the [Build nGraph Function](./build_function.md) page.
diff --git a/docs/optimization_guide/dldt_optimization_guide.md b/docs/optimization_guide/dldt_optimization_guide.md
index 87fb3d26b4d..58c4ba57064 100644
--- a/docs/optimization_guide/dldt_optimization_guide.md
+++ b/docs/optimization_guide/dldt_optimization_guide.md
@@ -275,7 +275,7 @@ The following tips are provided to give general guidance on optimizing execution
 
 -	The general affinity “rule of thumb” is to keep computationally-intensive kernels on the accelerator, and "glue" (or helper) kernels on the CPU. Notice that this includes the granularity considerations. For example, running some (custom) activation on the CPU would result in too many conversions.
 
--	It is advised to do <a href="#analyzing-hetero-execution">performance analysis</a> to determine “hotspot” kernels, which should be the first candidates for offloading. At the same time, it is often more efficient to offload some reasonably sized sequence of kernels, rather than individual kernels, to minimize scheduling and other run-time overheads.
+-	It is advised to do <a href="#analyzing-hetero-execution">performance analysis</a> to determine “hotspot” kernels, which should be the first candidates for offloading. At the same time, it is often more efficient to offload some reasonably sized sequence of kernels, rather than individual kernels, to minimize scheduling and other runtime overhead.
 
 -	Notice that GPU can be busy with other tasks (like rendering). Similarly, the CPU can be in charge for the general OS routines and other application threads (see <a href="#note-on-app-level-threading">Note on the App-Level Threading</a>). Also, a high interrupt rate due to many subgraphs can raise the frequency of the one device and drag the frequency of another down.
 
diff --git a/docs/ovsa/ovsa_get_started.md b/docs/ovsa/ovsa_get_started.md
index 19678297eb7..9d19ee63eb1 100644
--- a/docs/ovsa/ovsa_get_started.md
+++ b/docs/ovsa/ovsa_get_started.md
@@ -152,7 +152,7 @@ You're ready to configure the Host Machine for networking.
 This step is for the combined Model Developer and Independent Software Vendor roles. If Model User VM is running on different physical host, repeat the following steps for that host also.
 
 In this step you prepare two network bridges:
-* A global IP address that a KVM can access across the Internet. This is the address that the OpenVINO™ Security Add-on Run-time software on a user's machine uses to verify they have a valid license.
+* A global IP address that a KVM can access across the Internet. This is the address that the OpenVINO™ Security Add-on runtime software on a user's machine uses to verify they have a valid license.
 * A host-only local address to provide communication between the Guest VM and the QEMU host operating system.
 
 This example in this step uses the following names. Your configuration might use different names:
diff --git a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md
index 7370b6ab61f..727d39ab270 100644
--- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md
+++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md
@@ -76,7 +76,7 @@ Options:
 >
 > - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
 
-For example, to do inference on a CPU with the OpenVINO&trade; toolkit person detection SSD models, run one of the following commands:
+For example, to perform inference on a CPU with the OpenVINO&trade; toolkit person detection SSD models, run one of the following commands:
 
 - with one image and [person-detection-retail-0013](https://docs.openvinotoolkit.org/latest/omz_models_intel_person_detection_retail_0013_description_person_detection_retail_0013.html) model
 
diff --git a/inference-engine/ie_bridges/python/docs/api_overview.md b/inference-engine/ie_bridges/python/docs/api_overview.md
index a2fbea2ea58..577edcc080c 100644
--- a/inference-engine/ie_bridges/python/docs/api_overview.md
+++ b/inference-engine/ie_bridges/python/docs/api_overview.md
@@ -8,9 +8,10 @@ This API provides a simplified interface for Inference Engine functionality that
 
 ## Supported OSes
 
-Inference Engine Python\* API is supported on Ubuntu\* 18.04 and 20.04, CentOS\* 7.3 OSes, Raspbian\* 9, Windows\* 10 
-and macOS\* 10.x.    
-Supported Python* versions:  
+Inference Engine Python\* API is supported on Ubuntu\* 18.04 and 20.04, CentOS\* 7.3 OSes, Raspbian\* 9, Windows\* 10
+and macOS\* 10.x.
+
+Supported Python* versions:
 
 | Operating System | Supported Python\* versions: |
 |:----- | :----- |
@@ -18,8 +19,8 @@ Supported Python* versions:
 | Ubuntu\* 20.04  | 3.6, 3.7, 3.8 |
 | Windows\* 10 | 3.6, 3.7, 3.8 |
 | CentOS\* 7.3 | 3.6, 3.7 |
-| macOS\* 10.x  | 3.6, 3.7 |   
-| Raspbian\* 9  | 3.6, 3.7 |   
+| macOS\* 10.x  | 3.6, 3.7 |
+| Raspbian\* 9  | 3.6, 3.7 |
 
 
 ## Set Up the Environment
@@ -31,7 +32,7 @@ To configure the environment for the Inference Engine Python\* API, run:
  * On Raspbian\* 9,: `source <INSTALL_DIR>/bin/setupvars.sh .`
  * On Windows\* 10: `call <INSTALL_DIR>\bin\setupvars.bat`
 
-The script automatically detects latest installed Python\* version and configures required environment if the version is supported.  
+The script automatically detects latest installed Python\* version and configures required environment if the version is supported.
 If you want to use certain version of Python\*, set the environment variable `PYTHONPATH=<INSTALL_DIR>/python/<desired_python_version>`
 after running the environment configuration script.
 
diff --git a/inference-engine/ie_bridges/python/sample/hello_classification/README.md b/inference-engine/ie_bridges/python/sample/hello_classification/README.md
index 4003a81ee16..d662a94a263 100644
--- a/inference-engine/ie_bridges/python/sample/hello_classification/README.md
+++ b/inference-engine/ie_bridges/python/sample/hello_classification/README.md
@@ -27,7 +27,7 @@ each sample step at [Integration Steps](../../../../../docs/IE_DG/Integrate_with
 
 ## Running
 
-Run the application with the <code>-h</code> option to see the usage message:
+Run the application with the `-h` option to see the usage message:
 
 ```sh
 python hello_classification.py -h
@@ -68,7 +68,7 @@ To run the sample, you need specify a model and image:
 >
 > - The sample accepts models in ONNX format (.onnx) that do not require preprocessing.
 
-You can do inference of an image using a pre-trained model on a GPU using the following command:
+For example, to perform inference of an image using a pre-trained model on a GPU, run the following command:
 
 ```sh
 python hello_classification.py -m <path_to_model>/alexnet.xml -i <path_to_image>/cat.bmp -d GPU
diff --git a/inference-engine/ie_bridges/python/sample/hello_query_device/README.md b/inference-engine/ie_bridges/python/sample/hello_query_device/README.md
index 35e84bc23ed..af4784ebc38 100644
--- a/inference-engine/ie_bridges/python/sample/hello_query_device/README.md
+++ b/inference-engine/ie_bridges/python/sample/hello_query_device/README.md
@@ -1,6 +1,6 @@
 # Hello Query Device Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README}
 
-This sample demonstrates how to show Inference Engine devices and prints their metrics and default configuration values, using [Query Device API feature](../../../../../docs/IE_DG/InferenceEngine_QueryAPI.md).
+This sample demonstrates how to show Inference Engine devices and prints their metrics and default configuration values using [Query Device API feature](../../../../../docs/IE_DG/InferenceEngine_QueryAPI.md).
 
 The following Inference Engine Python API is used in the application:
 
@@ -28,7 +28,7 @@ python hello_query_device.py
 
 ## Sample Output
 
-For example:
+The application prints all available devices with their supported metrics and default values for configuration parameters. (Some lines are not shown due to length.) For example:
 
 ```sh
 [ INFO ] Creating Inference Engine
@@ -101,7 +101,6 @@ For example:
 [ INFO ]                TUNING_MODE: TUNING_DISABLED
 [ INFO ]
 ```
-
 ## See Also
 
 - [Using Inference Engine Samples](../../../../../docs/IE_DG/Samples_Overview.md)
diff --git a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md
index 17a5640ccf3..b2638d78dac 100644
--- a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md
+++ b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md
@@ -21,7 +21,7 @@ Basic Inference Engine API is covered by [Hello Classification Python* Sample](.
 
 ## How It Works
 
-At startup, the sample application reads command-line parameters, prepares input data, loads a specified model and image to the Inference Engine plugin, performs synchronous inference, and processes output data.  
+On startup, the sample application reads command-line parameters, prepares input data, loads a specified model and image to the Inference Engine plugin, performs synchronous inference, and processes output data.  
 As a result, the program creates an output image, logging each step in a standard output stream.
 
 You can see the explicit description of
diff --git a/inference-engine/samples/speech_libs_and_demos/Offline_speech_recognition_demo.md b/inference-engine/samples/speech_libs_and_demos/Offline_speech_recognition_demo.md
index 71e1d693e1f..10594e2c321 100644
--- a/inference-engine/samples/speech_libs_and_demos/Offline_speech_recognition_demo.md
+++ b/inference-engine/samples/speech_libs_and_demos/Offline_speech_recognition_demo.md
@@ -1,9 +1,9 @@
 # Offline Speech Recognition Demo {#openvino_inference_engine_samples_speech_libs_and_demos_Offline_speech_recognition_demo}
 
-This demo provides a command-line interface for automatic speech recognition using OpenVINO&trade;.  
+This demo provides a command-line interface for automatic speech recognition using OpenVINO&trade;.
 Components used by this executable:
 
-* `lspeech_s5_ext` model     - Example pretrained LibriSpeech DNN
+* `lspeech_s5_ext` model     - Example pre-trained LibriSpeech DNN
 * `speech_library.dll` (`.so`) - Open source speech recognition library that uses OpenVINO&trade; Inference Engine, Intel&reg; Speech Feature Extraction and Intel&reg; Speech Decoder libraries
 
 ## How It Works
@@ -87,4 +87,4 @@ The resulting transcription for the sample audio file:
 [ INFO ] Model loading time: 61.01 ms
 Recognition result:
 HOW ARE YOU DOING
-```
\ No newline at end of file
+```
diff --git a/inference-engine/samples/speech_libs_and_demos/Speech_libs_and_demos.md b/inference-engine/samples/speech_libs_and_demos/Speech_libs_and_demos.md
index 212ffb26f19..5bd8b99d82a 100644
--- a/inference-engine/samples/speech_libs_and_demos/Speech_libs_and_demos.md
+++ b/inference-engine/samples/speech_libs_and_demos/Speech_libs_and_demos.md
@@ -34,9 +34,9 @@ The package contains the following components:
 
 Additionally, new acoustic and language models are available in the OpenVINO&trade; [storage](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/).
 
-## <a name="run-demos">Run Speech Recognition Demos with Pretrained Models</a>
+## <a name="run-demos">Run Speech Recognition Demos with Pre-trained Models</a>
 
-To download pretrained models and build all dependencies:
+To download pre-trained models and build all dependencies:
 
 * On Linux* OS, use the shell script `<INSTALL_DIR>/deployment_tools/demo/demo_speech_recognition.sh`
 
@@ -67,9 +67,9 @@ set https_proxy=https://{proxyHost}:{proxyPort}
 
 ## <a name="hardware-support">Hardware Support</a>
 
-The provided acoustic models have been tested on a CPU, graphics processing unit (GPU), and Intel&reg; Gaussian & Neural Accelerator (Intel&reg; GNA), and you can switch between these targets in offline and live speech recognition demos.  
+The provided acoustic models have been tested on a CPU, graphics processing unit (GPU), and Intel&reg; Gaussian & Neural Accelerator (Intel&reg; GNA), and you can switch between these targets in offline and live speech recognition demos.
 
-> **NOTE**: Intel&reg; GNA is a specific low-power coprocessor, which offloads some workloads, thus saving power and CPU resources. If you use a processor supporting the GNA, such as Intel&reg; Core&trade; i3-8121U and Intel&reg; Core&trade; i7-1065G7, you can notice that CPU load is much lower when GNA is selected. If you selected GNA as a device for inference, and your processor does not support GNA, then execution is performed in the emulation mode (on CPU) because `GNA_AUTO` configuration option is used.   
+> **NOTE**: Intel&reg; GNA is a specific low-power coprocessor, which offloads some workloads, thus saving power and CPU resources. If you use a processor supporting the GNA, such as Intel&reg; Core&trade; i3-8121U and Intel&reg; Core&trade; i7-1065G7, you can notice that CPU load is much lower when GNA is selected. If you selected GNA as a device for inference, and your processor does not support GNA, then execution is performed in the emulation mode (on CPU) because `GNA_AUTO` configuration option is used.
 > See [the GNA plugin documentation](https://docs.openvinotoolkit.org/latest/_docs_IE_DG_supported_plugins_GNA.html) for more information.
 
 Speech Library provides a highly optimized implementation of preprocessing and postprocessing (feature extraction and decoding) on CPU only.
@@ -78,7 +78,7 @@ Speech Library provides a highly optimized implementation of preprocessing and p
 
 Before running demonstration applications with custom models, follow the steps below:
 
-1. Build the Speech Library and demonstration application using the `demo_speech_recognition.sh/.bat` file mentioned in <a href="#run-demos">Run Speech Recognition Demos with Pretrained Models</a>
+1. Build the Speech Library and demonstration application using the `demo_speech_recognition.sh/.bat` file mentioned in <a href="#run-demos">Run Speech Recognition Demos with Pre-trained Models</a>
 2. Train acoustic and statistical language models using the Kaldi framework (if required)
 3. [Convert the acoustic model](../../../docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md) using Model Optimizer for Kaldi
 4. [Convert the language model](Kaldi_SLM_conversion_tool.md) using the Kaldi toolkit and provided converter
diff --git a/inference-engine/samples/speech_sample/README.md b/inference-engine/samples/speech_sample/README.md
index 5d10f81c6e5..91365bd6c60 100644
--- a/inference-engine/samples/speech_sample/README.md
+++ b/inference-engine/samples/speech_sample/README.md
@@ -24,7 +24,7 @@ Basic Inference Engine API is covered by [Hello Classification C++ sample](../he
 |:---                              |:---
 | Validated Models                 | Acoustic model based on Kaldi\* neural networks (see [Model Preparation](#model-preparation) section)
 | Model Format                     | Inference Engine Intermediate Representation (\*.xml + \*.bin), ONNX (\*.onnx)
-| Supported devices                | See [Execution Modes section](#execution-modes) below and [List Supported Devices](../../../docs/IE_DG/supported_plugins/Supported_Devices.md) |
+| Supported devices                | See [Execution Modes](#execution-modes) section below and [List Supported Devices](../../../docs/IE_DG/supported_plugins/Supported_Devices.md) |
 
 ## How It Works
 
@@ -61,14 +61,14 @@ will be removed in GNA hardware version 3 and higher.
 
 Several execution modes are supported via the `-d` flag:
 
-- `CPU` - all calculation will be performed on CPU device using CPU Plugin.
-- `GPU` - all calculation will be performed on GPU device using GPU Plugin.
-- `MYRIAD` - all calculation will be performed on Intel® Neural Compute Stick 2 device using VPU MYRIAD Plugin.
-- `GNA_AUTO` - the GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode.
-- `GNA_HW` - the GNA hardware is used if available and the driver is installed. Otherwise, an error will occur.
-- `GNA_SW` - deprecated. The GNA device is emulated in fast-but-not-bit-exact mode.
-- `GNA_SW_FP32` - substitutes parameters and calculations from low precision to floating point (FP32).
-- `GNA_SW_EXACT` - the GNA device is emulated in bit-exact mode.
+- `CPU` - All calculation are performed on CPU device using CPU Plugin.
+- `GPU` - All calculation are performed on GPU device using GPU Plugin.
+- `MYRIAD` - All calculation are performed on Intel® Neural Compute Stick 2 device using VPU MYRIAD Plugin.
+- `GNA_AUTO` - GNA hardware is used if available and the driver is installed. Otherwise, the GNA device is emulated in fast-but-not-bit-exact mode.
+- `GNA_HW` - GNA hardware is used if available and the driver is installed. Otherwise, an error will occur.
+- `GNA_SW` - Deprecated. The GNA device is emulated in fast-but-not-bit-exact mode.
+- `GNA_SW_FP32` - Substitutes parameters and calculations from low precision to floating point (FP32).
+- `GNA_SW_EXACT` - GNA device is emulated in bit-exact mode.
 
 #### Loading and Saving Models
 
@@ -137,7 +137,7 @@ Running the application with the empty list of options yields the usage message
 You can use the following model optimizer command to convert a Kaldi nnet1 or nnet2 neural network to Inference Engine Intermediate Representation format:
 
 ```sh
-python mo.py --framework kaldi --input_model wsj_dnn5b.nnet --counts wsj_dnn5b.counts --remove_output_softmax
+python mo.py --framework kaldi --input_model wsj_dnn5b.nnet --counts wsj_dnn5b.counts --remove_output_softmax --output_dir <OUTPUT_MODEL_DIR>
 ```
 
 Assuming that the model optimizer (`mo.py`), Kaldi-trained neural network, `wsj_dnn5b.nnet`, and Kaldi class counts file, `wsj_dnn5b.counts`, are in the working directory this produces
@@ -153,14 +153,16 @@ All of them can be downloaded from [https://storage.openvinotoolkit.org/models_c
 
 ### Speech Inference
 
-Once the IR is created, you can use the following command to do inference on Intel^&reg; Processors with the GNA co-processor (or emulation library):
+Once the IR is created, you can use the following command to do inference on Intel&reg; Processors with the GNA co-processor (or emulation library):
 
 ```sh
 ./speech_sample -d GNA_AUTO -bs 2 -i dev93_10.ark -m wsj_dnn5b.xml -o scores.ark -r dev93_scores_10.ark
 ```
 
 Here, the floating point Kaldi-generated reference neural network scores (`dev93_scores_10.ark`) corresponding to the input feature file (`dev93_10.ark`) are assumed to be available
-for comparison. All of them can be downloaded from [https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/wsj_dnn5b_smbr](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/wsj_dnn5b_smbr). Inference Engine Intermediate Representation `wsj_dnn5b.xml` file was generated in the [previous Model preparation section](#model-preparation).
+for comparison. 
+
+All of them can be downloaded from [https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/wsj_dnn5b_smbr](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/wsj_dnn5b_smbr). Inference Engine Intermediate Representation `wsj_dnn5b.xml` file was generated in the previous [Model Preparation](#model-preparation) section.
 
 > **NOTES**:
 >
@@ -230,8 +232,7 @@ nnet-forward --use-gpu=no final.feature_transform "ark,s,cs:copy-feats scp:feats
 ```sh
 ./speech_sample -d GNA_AUTO -bs 8 -i feat.ark -m wsj_dnn5b.xml -o scores.ark
 ```
-
-Inference Engine Intermediate Representation `wsj_dnn5b.xml` file was generated in the [previous Model preparation section](#model-preparation).
+Inference Engine Intermediate Representation `wsj_dnn5b.xml` file was generated in the previous [Model Preparation](#model-preparation) section.
 
 3. Run the Kaldi decoder to produce n-best text hypotheses and select most likely text given the WFST (`HCLG.fst`), vocabulary (`words.txt`), and TID/PID mapping (`final.mdl`):
 
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/doc/10-hld-overview.md b/inference-engine/thirdparty/fluid/modules/gapi/doc/10-hld-overview.md
index 557bf08b12e..6de6efa9216 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/doc/10-hld-overview.md
+++ b/inference-engine/thirdparty/fluid/modules/gapi/doc/10-hld-overview.md
@@ -142,7 +142,7 @@ Graph execution is triggered in two ways:
 
 Both methods are polimorphic and take a variadic number of arguments,
 with validity checks performed in runtime. If a number, shapes, and
-formats of passed data objects differ from expected, a run-time
+formats of passed data objects differ from expected, a runtime
 exception is thrown. G-API also provides _typed_ wrappers to move
 these checks to the compile time -- see `cv::GComputationT<>`.
 
diff --git a/inference-engine/tools/compile_tool/README.md b/inference-engine/tools/compile_tool/README.md
index 14b72bb6299..0b083e15dc1 100644
--- a/inference-engine/tools/compile_tool/README.md
+++ b/inference-engine/tools/compile_tool/README.md
@@ -1,13 +1,13 @@
 # Compile Tool {#openvino_inference_engine_tools_compile_tool_README}
 
-Compile tool is a C++ application that enables you to compile a network for inference on a specific device and export it to a binary file. 
+Compile tool is a C++ application that enables you to compile a network for inference on a specific device and export it to a binary file.
 With the Compile Tool, you can compile a network using supported Inference Engine plugins on a machine that doesn't have the physical device connected and then transfer a generated file to any machine with the target inference device available.
 
 The tool compiles networks for the following target devices using corresponding Inference Engine plugins:
 * Intel® Neural Compute Stick 2 (MYRIAD plugin)
 
 
-> **NOTE**: Intel® Distribution of OpenVINO™ toolkit no longer supports the Intel® Vision Accelerator Design with an Intel® Arria® 10 FPGA and the Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA. To compile a network for those devices, use the Compile Tool from the Intel® Distribution of OpenVINO™ toolkit [2020.3 LTS release](https://docs.openvinotoolkit.org/2020.3/_inference_engine_tools_compile_tool_README.html).  
+> **NOTE**: Intel® Distribution of OpenVINO™ toolkit no longer supports the Intel® Vision Accelerator Design with an Intel® Arria® 10 FPGA and the Intel® Programmable Acceleration Card with Intel® Arria® 10 GX FPGA. To compile a network for those devices, use the Compile Tool from the Intel® Distribution of OpenVINO™ toolkit [2020.3 LTS release](https://docs.openvinotoolkit.org/2020.3/_inference_engine_tools_compile_tool_README.html).
 
 
 The tool is delivered as an executable file that can be run on both Linux* and Windows*.
@@ -15,7 +15,7 @@ The tool is located in the `<INSTALLROOT>/deployment_tools/tools/compile_tool` d
 
 The workflow of the Compile tool is as follows:
 
-1. Upon the start, the tool application reads command-line parameters and loads a network to the Inference Engine device.
+1. First, the application reads command-line parameters and loads a network to the Inference Engine device.
 2. The application exports a blob with the compiled network and writes it to the output file.
 
 ## Run the Compile Tool

From ec7b1f441634599cc79ee5603d2bacf7256594ba Mon Sep 17 00:00:00 2001
From: Vladimir Zinoviev <vladimir.zinoviev@intel.com>
Date: Thu, 6 May 2021 15:54:56 +0300
Subject: [PATCH 64/73] [LPT] Improve Etlwise branch selection logic (#5208)

---
 .../src/eltwise_base_transformation.cpp       | 90 ++++++++++++-------
 .../lp_transformations/add_transformation.cpp | 24 +++++
 .../lpt_ngraph_functions/src/add_function.cpp | 80 ++++++++++++++---
 3 files changed, 151 insertions(+), 43 deletions(-)

diff --git a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp
index 53fa6b3d618..a119dcca64f 100644
--- a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp
@@ -26,28 +26,6 @@ bool EltwiseBaseTransformation::isBroadcasted(const Shape& shape) noexcept {
     return true;
 }
 
-bool isBranchWithTargetType(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) {
-    if (fakeQuantize == nullptr) {
-        return false;
-    }
-
-    const std::shared_ptr<Node> parent = fakeQuantize->get_input_node_shared_ptr(0);
-
-    if ((parent->get_output_size() != 1ul) || (parent->get_output_target_inputs(0).size() != 1ul)) {
-        return false;
-    }
-
-    bool isTargetType =
-        is_type<opset1::Convolution>(parent) ||
-        (is_type<opset1::Add>(parent) && is_type<opset1::Convolution>(parent->get_input_node_shared_ptr(0))) ||
-        is_type<opset1::GroupConvolution>(parent) ||
-        (is_type<opset1::Add>(parent) && is_type<opset1::GroupConvolution>(parent->get_input_node_shared_ptr(0))) ||
-        is_type<opset1::MatMul>(parent) ||
-        (is_type<opset1::Add>(parent) && is_type<opset1::MatMul>(parent->get_input_node_shared_ptr(0)));
-
-    return isTargetType;
-}
-
 bool EltwiseBaseTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
     if (!LayerTransformation::canBeTransformed(context, operation)) {
         return false;
@@ -85,6 +63,36 @@ bool EltwiseBaseTransformation::canBeTransformed(const TransformationContext& co
     return true;
 }
 
+static bool isTargetType(const std::shared_ptr<Node> node) {
+    return is_type<opset1::Convolution>(node) ||
+           is_type<opset1::GroupConvolution>(node) ||
+           is_type<opset1::MatMul>(node);
+}
+
+static std::shared_ptr<Node> getDataParent(const std::shared_ptr<Node> branchData) {
+    std::shared_ptr<Node> parent = branchData;
+    while (is_type<opset1::FakeQuantize>(parent)) {
+        parent = parent->get_input_node_shared_ptr(0);
+    }
+
+    if (is_type<opset1::Add>(parent) && isTargetType(parent->get_input_node_shared_ptr(0))) {
+        return parent->get_input_node_shared_ptr(0);
+    }
+    return parent;
+}
+
+static bool isBranchHaveMultipleConsumers(const std::shared_ptr<Node> branchData, const std::shared_ptr<Node> branchDataParent) {
+    auto parent = branchData;
+    while (parent != branchDataParent) {
+        if ((parent->get_output_size() != 1ul) || (parent->get_output_target_inputs(0).size() != 1ul)) {
+            return true;
+        }
+        parent = parent->get_input_node_shared_ptr(0);
+    }
+    return (parent->get_output_size() != 1ul) || (parent->get_output_target_inputs(0).size() != 1ul);
+}
+
+// return branch index with FP32 precision after eltwise transformation
 int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr<Node>& eltwise) const {
     const FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(eltwise, 0ul);
     if (dequantization1.empty() || as_type<opset1::Constant>(dequantization1.data.get_node())) {
@@ -126,21 +134,37 @@ int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr<Node>& eltwise)
         return 1;
     }
 
-    const bool allBranchesAreEqual = isBranchWithTargetType(fakeQuantize1) == isBranchWithTargetType(fakeQuantize2);
-    const std::vector<std::shared_ptr<Node>> dataNodes = {
-        dequantization1.data.get_node_shared_ptr(),
-        dequantization2.data.get_node_shared_ptr() };
-    for (size_t i = 0; i < dataNodes.size(); ++i) {
-        const std::shared_ptr<Node>& data = dataNodes[i];
-        if ((allBranchesAreEqual && isBroadcasted(data->get_output_shape(0))) ||
-            (!allBranchesAreEqual && isBranchWithTargetType(as_type_ptr<opset1::FakeQuantize>(data)))) {
-            return static_cast<int>(i);
+    const std::vector<std::shared_ptr<Node>> parentNodes = {
+            getDataParent(dequantization1.data.get_node_shared_ptr()),
+            getDataParent(dequantization2.data.get_node_shared_ptr()) };
+
+    const bool allBranchesAreEqual = isTargetType(parentNodes[0]) == isTargetType(parentNodes[1]);
+    if (allBranchesAreEqual) {
+        for (size_t i = 0; i < parentNodes.size(); ++i) {
+             if (isBroadcasted(parentNodes[i]->get_output_shape(0))) {
+                return static_cast<int>(i);
+            }
         }
     }
 
-    int fullPathIndex = 0;
+    const bool multipleConsumers0 = isBranchHaveMultipleConsumers(dequantization1.data.get_node_shared_ptr(), parentNodes[0]);
+    const bool multipleConsumers1 = isBranchHaveMultipleConsumers(dequantization2.data.get_node_shared_ptr(), parentNodes[1]);
+    if (multipleConsumers0 && !multipleConsumers1) {
+        return 1;
+    }
+    if (!multipleConsumers0 && multipleConsumers1) {
+        return 0;
+    }
 
-    return fullPathIndex;
+    if (!allBranchesAreEqual) {
+        for (size_t i = 0; i < parentNodes.size(); ++i) {
+            if (isTargetType(parentNodes[i])) {
+                return static_cast<int>(i);
+            }
+        }
+    }
+
+    return 0;
 }
 
 std::pair<int, int> EltwiseBaseTransformation::getMultiplyConstBranch(const std::shared_ptr<Node>& eltwise) const {
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp
index ca5175caf8c..8098b89e73c 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp
@@ -637,6 +637,30 @@ const std::vector<AddTransformationTestValues> addTransformationTestValues = {
         },
         "convolution"
     },
+    // convolution with multiple consumers before FQ ( FP32 on other branch due to possible quantize fusing )
+    {
+        ngraph::element::f32,
+        ngraph::Shape{1, 4, 16, 16},
+        false,
+        -1,
+        LayerTransformation::createParamsU8I8(),
+        {
+                ngraph::element::u8,
+                { {ngraph::element::f32},  { 7.f }, { 10.f }},
+                ngraph::element::u8,
+                { {ngraph::element::f32},  { 3.f }, { 5.f } },
+                {}
+        },
+        {
+                ngraph::element::u8,
+                { {ngraph::element::f32},  { 8.5f }, { 2.f } },
+                ngraph::element::u8,
+                { {},  {}, {}},
+                { {},  {}, { 5.f } },
+                {}
+        },
+        "convolution_multiconsumers"
+    },
     // group convolution before FQ (choose that branch)
     {
         ngraph::element::f32,
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/add_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/add_function.cpp
index f8afbe434b0..6a15342ee41 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/add_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/add_function.cpp
@@ -30,15 +30,24 @@ std::shared_ptr<ngraph::Function> AddFunction::getOriginal(
     const std::vector<float>& constValues,
     const std::string& additionalLayer) {
     std::shared_ptr<ngraph::Node> input1;
+    std::shared_ptr<ngraph::Node> parent1;
     if (constInput == 0) {
-        input1 = std::make_shared<ngraph::opset1::Constant>(
+        parent1 = std::make_shared<ngraph::opset1::Constant>(
             precision,
             inputShape,
             constValues);
     } else {
         input1 = std::make_shared<ngraph::opset1::Parameter>(
-            precision1.is_real() ? precision : precision1,
-            broadcast ? ngraph::Shape({ inputShape[0], inputShape[1], 1, 1 }) : ngraph::Shape(inputShape));
+            additionalLayer != "" ? precision : (precision1.is_real() ? precision : precision1),
+            broadcast ? ngraph::Shape({inputShape[0], inputShape[1], 1, 1}) : ngraph::Shape(inputShape));
+        if (additionalLayer != "") {
+            parent1 = ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed(
+                input1,
+                precision,
+                {256, Shape{}, {0}, {255}, {0}, {255}, precision1});
+        } else {
+            parent1 = input1;
+        }
     }
 
     auto dequantizationStructure1 = dequantization1;
@@ -47,7 +56,7 @@ std::shared_ptr<ngraph::Function> AddFunction::getOriginal(
         dequantizationStructure1.subtract.outPrecision = precision;
     }
 
-    const auto dequantizationOp1 = dequantization1.empty() ? input1 : makeDequantization(input1, dequantizationStructure1);
+    const auto dequantizationOp1 = dequantization1.empty() ? parent1 : makeDequantization(parent1, dequantizationStructure1);
 
     std::shared_ptr<ngraph::Node> input2;
     if (constInput == 1) {
@@ -73,6 +82,21 @@ std::shared_ptr<ngraph::Function> AddFunction::getOriginal(
             ngraph::CoordinateDiff{ 0, 0 },
             ngraph::Strides{ 1, 1 });
     }
+    std::shared_ptr<Node> additional_output = nullptr;
+    if (additionalLayer == "convolution_multiconsumers") {
+        parent = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::Convolution>>(
+                std::vector<element::Type>{ element::f32, element::f32 },
+                std::vector<element::Type>{ precision },
+                ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
+                ngraph::op::TemporaryReplaceOutputType(
+                        std::make_shared<ngraph::opset1::Constant>(element::i8, Shape{ 1, 4, 1, 1 }, std::vector<float>{0.8f, 0.8f, 0.8f, 0.8f}),
+                        element::f32).get(),
+                ngraph::Strides{ 1, 1 },
+                ngraph::CoordinateDiff{ 0, 0 },
+                ngraph::CoordinateDiff{ 0, 0 },
+                ngraph::Strides{ 1, 1 });
+        additional_output = parent;
+    }
     if (additionalLayer == "group_convolution") {
         parent = std::make_shared< ngraph::op::TypeRelaxed<ngraph::opset1::GroupConvolution>>(
             std::vector<element::Type>{ element::f32, element::f32 },
@@ -105,7 +129,13 @@ std::shared_ptr<ngraph::Function> AddFunction::getOriginal(
     auto& rtInfo = add->get_rt_info();
     rtInfo["Variant::std::string"] = std::make_shared<VariantWrapper<std::string>>("add");
 
-    ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(add) };
+    std::shared_ptr<Node> output = add;
+    if (additional_output != nullptr) {
+        output = std::make_shared<opset1::Multiply>(add, additional_output);
+        output->set_friendly_name("output_multiply");
+    }
+
+    ngraph::ResultVector results {std::make_shared<ngraph::opset1::Result>(output)};
     ngraph::ParameterVector parameters;
     if (constInput == -1) {
         parameters = { as_type_ptr<ngraph::opset1::Parameter>(input1), as_type_ptr<ngraph::opset1::Parameter>(input2) };
@@ -172,20 +202,29 @@ std::shared_ptr<ngraph::Function> AddFunction::getReference(
     const std::string& additionalLayer,
     const std::string& operationType) {
     std::shared_ptr<ngraph::Node> input1;
+    std::shared_ptr<ngraph::Node> parent1;
     if (constInputIndex == 0) {
-        input1 = std::make_shared<ngraph::opset1::Constant>(
+        parent1 = std::make_shared<ngraph::opset1::Constant>(
             dequantizationAfter.empty() ? precision : element::f32,
             inputShape,
             constValues);
     } else {
         input1 = std::make_shared<ngraph::opset1::Parameter>(
-            precision1.is_real() ? precision : precision1,
-            broadcast ? ngraph::Shape({ inputShape[0], inputShape[1], 1, 1 }) : ngraph::Shape(inputShape));
+            additionalLayer != "" ? precision : (precision1.is_real() ? precision : precision1),
+            broadcast ? ngraph::Shape({inputShape[0], inputShape[1], 1, 1}) : ngraph::Shape(inputShape));
+        if (additionalLayer != "") {
+            parent1 = ngraph::builder::subgraph::makeFakeQuantizeTypeRelaxed(
+                    input1,
+                    precision,
+                    {256, Shape{}, {0}, {255}, {0}, {255}, precision1});
+        } else {
+            parent1 = input1;
+        }
     }
 
     auto dequantizationStructure1 = dequantization1;
     dequantizationStructure1.multiply.outPrecision = dequantizationAfter.empty() ? precision : element::f32;
-    const auto dequantizationOp1 = is_type<ngraph::opset1::Constant>(input1) ? input1 : makeDequantization(input1, dequantizationStructure1);
+    const auto dequantizationOp1 = is_type<ngraph::opset1::Constant>(parent1) ? parent1 : makeDequantization(parent1, dequantizationStructure1);
 
     std::shared_ptr<ngraph::Node> input2;
     if (constInputIndex == 1) {
@@ -211,6 +250,21 @@ std::shared_ptr<ngraph::Function> AddFunction::getReference(
             ngraph::CoordinateDiff{ 0, 0 },
             ngraph::Strides{ 1, 1 });
     }
+    std::shared_ptr<Node> additional_output = nullptr;
+    if (additionalLayer == "convolution_multiconsumers") {
+        parent = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::Convolution>>(
+                std::vector<element::Type>{ element::f32, element::f32 },
+                std::vector<element::Type>{ precision },
+                ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
+                ngraph::op::TemporaryReplaceOutputType(
+                        std::make_shared<ngraph::opset1::Constant>(element::i8, Shape{ 1, 4, 1, 1 }, std::vector<float>{0.8f, 0.8f, 0.8f, 0.8f}),
+                        element::f32).get(),
+                ngraph::Strides{ 1, 1 },
+                ngraph::CoordinateDiff{ 0, 0 },
+                ngraph::CoordinateDiff{ 0, 0 },
+                ngraph::Strides{ 1, 1 });
+        additional_output = parent;
+    }
     if (additionalLayer == "group_convolution") {
         parent = std::make_shared< ngraph::op::TypeRelaxed<ngraph::opset1::GroupConvolution>>(
             std::vector<element::Type>{ element::f32, element::f32 },
@@ -259,8 +313,14 @@ std::shared_ptr<ngraph::Function> AddFunction::getReference(
     const auto dequantizationOpAfter = makeDequantization(add, dequantizationStructureAfter);
 
     dequantizationOpAfter->set_friendly_name("output");
+    std::shared_ptr<Node> output = dequantizationOpAfter;
+    if (additional_output != nullptr) {
+        output = std::make_shared<opset1::Multiply>(dequantizationOpAfter, additional_output);
+        output->set_friendly_name("output_multiply");
+    }
+
+    ngraph::ResultVector results {std::make_shared<ngraph::opset1::Result>(output)};
 
-    ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(dequantizationOpAfter) };
     ngraph::ParameterVector parameters;
     if (constInputIndex == -1) {
         parameters = { as_type_ptr<ngraph::opset1::Parameter>(input1), as_type_ptr<ngraph::opset1::Parameter>(input2) };

From e86b9b17b002db6ad8bede503cf08ddd7b23f136 Mon Sep 17 00:00:00 2001
From: Anton Chetverikov <Anton.Chetverikov@intel.com>
Date: Thu, 6 May 2021 16:01:53 +0300
Subject: [PATCH 65/73] Update Gather-7 specification (#5441)

* Allow nagative values for batch_dims

* Update formula

* Update spec according to comments

* clarified cases when batch_dims and axis less than zero and enhanced restriction for index types

Co-authored-by: Pavel Esir <pavel.esir@intel.com>
---
 docs/ops/movement/Gather_7.md | 41 ++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/docs/ops/movement/Gather_7.md b/docs/ops/movement/Gather_7.md
index b6370807fd3..d0ca451fee2 100644
--- a/docs/ops/movement/Gather_7.md
+++ b/docs/ops/movement/Gather_7.md
@@ -10,18 +10,20 @@ TensorFlow\* [Gather](https://www.tensorflow.org/api_docs/python/tf/gather) oper
 
 **Detailed description**
 
-    output[p_0, p_1, ..., p_{axis-1}, p_axis, ..., p_{axis + k}, ...] = 
-       data[p_0, p_1, ..., p_{axis-1}, indices[p_0, p_1, ..., p_{b-1}, p_b, ..., p_{axis}, j], ...]
+    output[p_0, p_1, ..., p_{axis-1}, i_b, ..., i_{M-1}, p_{axis+1}, ..., p_{N-1}] = 
+       data[p_0, p_1, ..., p_{axis-1}, indices[p_0, p_1, ..., p_{b-1}, i_b, ..., i_{M-1}], p_{axis+1}, ..., p_{N-1}]
 
-Where `data`, `indices` and `axis` are tensors from first, second and third inputs correspondingly, and `b` is 
-the number of batch dimensions.
+Where `data`, `indices` and `axis` are tensors from first, second and third inputs correspondingly, `b` is 
+the number of batch dimensions. `N` and `M` are numbers of dimensions of `data` and `indices` tensors, respectively.
 
 **Attributes**:
 * *batch_dims*
   * **Description**: *batch_dims* (also denoted as `b`) is a leading number of dimensions of `data` tensor and `indices` 
   representing the batches, and *Gather* starts to gather from the `b` dimension. It requires the first `b` 
-  dimensions in `data` and `indices` tensors to be equal.
-  * **Range of values**: `[0; min(data.rank, indices.rank))` and `batch_dims <= axis`
+  dimensions in `data` and `indices` tensors to be equal. If `batch_dims` is less than zero, normalized value is used 
+  `batch_dims = indices.rank + batch_dims`.
+  * **Range of values**: `[-min(data.rank, indices.rank); min(data.rank, indices.rank))` and `batch_dims' <= axis'`.
+  Where `batch_dims'` and `axis'` stand for normalized `batch_dims` and `axis` values.
   * **Type**: *T_AXIS*
   * **Default value**: 0
   * **Required**: *no*
@@ -112,6 +114,24 @@ output = [[[[ 5,  6,  7,  8],
 output_shape = (2, 1, 3, 4)
 ```
 
+Example 5 with negative *batch_dims* value:
+```
+batch_dims = -1  <-- normalized value will be indices.rank + batch_dims = 2 - 1 = 1
+axis = 1
+
+indices = [[0, 0, 4], <-- this is applied to the first batch 
+           [4, 0, 0]]  <-- this is applied to the second batch
+indices_shape = (2, 3)
+
+data    = [[1, 2, 3, 4, 5],  <-- the first batch
+           [6, 7, 8, 9, 10]]  <-- the second batch 
+data_shape = (2, 5)
+
+output  = [[ 1, 1, 5],
+           [10, 6, 6]]
+output_shape = (2, 3)
+```
+
 **Inputs**
 
 * **1**:  `data` tensor of type *T* with arbitrary data. **Required**.
@@ -120,8 +140,9 @@ output_shape = (2, 1, 3, 4)
 **Required**.
 
 * **3**:  Scalar or 1D tensor `axis` of *T_AXIS* type is a dimension index to gather data from. For example, 
-*axis* equal to 1 means that gathering is performed over the first dimension. Negative value means reverse indexing. 
-Allowed values are from `[-len(data.shape), len(indices.shape) - 1]` and `axis >= batch_dims`. 
+*axis* equal to 1 means that gathering is performed over the first dimension. Negative `axis` means reverse indexing and 
+  will be normalized to value `axis = data.rank + axis`. Allowed values are from `[-len(data.shape), len(data.shape) - 1]` 
+  and `axis' >= batch_dims'`. Where `axis'` and `batch_dims'` stand for normalized `batch_dims` and `axis` values.
 **Required**.
 
 **Outputs**
@@ -133,9 +154,9 @@ of the output tensor is `data.shape[:axis] + indices.shape[batch_dims:] + data.s
 
 * *T*: any supported type.
 
-* *T_IND*: `int32` or `int64`.
+* *T_IND*: any supported integer types.
 
-* *T_AXIS*: `int32` or `int64`.
+* *T_AXIS*: any supported integer types.
 
 **Example**
 

From b3c1dd9de31e13bbef4ce757821d3846d9c1a95b Mon Sep 17 00:00:00 2001
From: Anton Chetverikov <Anton.Chetverikov@intel.com>
Date: Thu, 6 May 2021 16:20:37 +0300
Subject: [PATCH 66/73] Add sys_platform environment marker to version checker
 (#5437)

* Add sys_platform environment marker

* Update sys_platform check

* Add unit tests for sys_platform marker

* apply review comments

* Fix typo

* Update checker and tests, apply comments

* Update comments parsing and tests

* Fix commrnt

* Resolve comments and update check logic

* Update tests and fix bug with negative tests

Co-authored-by: achetver <anton.chetverikov@.intel.com>
---
 model-optimizer/mo/utils/versions_checker.py  | 60 +++++++++++-----
 .../mo/utils/versions_checker_test.py         | 70 +++++++++++++++++--
 2 files changed, 106 insertions(+), 24 deletions(-)

diff --git a/model-optimizer/mo/utils/versions_checker.py b/model-optimizer/mo/utils/versions_checker.py
index 558930b4a75..17ee21f14d1 100644
--- a/model-optimizer/mo/utils/versions_checker.py
+++ b/model-optimizer/mo/utils/versions_checker.py
@@ -56,22 +56,26 @@ def parse_and_filter_versions_list(required_fw_versions, version_list, env_setup
     line = line.strip(' ')
     if line == '':
         return version_list
-    splited_requirement = line.split(";")
+    split_requirement = line.split(";")
 
     # check environment marker
-    if len(splited_requirement) > 1:
-        env_req = splited_requirement[1]
-        splited_env_req = re.split(r"==|>=|<=|>|<|~=", env_req)
-        splited_env_req = [l.strip(',') for l in splited_env_req]
-        env_marker = splited_env_req[0].strip(' ')
+    if len(split_requirement) > 1:
+        env_req = split_requirement[1]
+        if any([x in split_requirement[1] for x in [' and ', ' or ']]):
+            log.error("The version checker doesn't support environment marker combination and it will be ignored: {}"
+                      "".format(split_requirement[1]), extra={'is_warning': True})
+            return version_list
+        split_env_req = re.split(r"==|>=|<=|>|<|~=|!=", env_req)
+        split_env_req = [l.strip(',') for l in split_env_req]
+        env_marker = split_env_req[0].strip(' ')
         if env_marker == 'python_version' and env_marker in env_setup:
             installed_python_version = env_setup['python_version']
             env_req_version_list = []
-            splited_required_versions = re.split(r",", env_req)
-            for i, l in enumerate(splited_required_versions):
+            split_required_versions = re.split(r",", env_req)
+            for i, l in enumerate(split_required_versions):
                 for comparison in ['==', '>=', '<=', '<', '>', '~=']:
                     if comparison in l:
-                        required_version = splited_env_req[i + 1].strip(' ').replace('"', '')
+                        required_version = split_env_req[i + 1].strip(' ').replace("'", "").replace('"', '')
                         env_req_version_list.append((env_marker, comparison, required_version))
                         break
             not_satisfied_list = []
@@ -82,25 +86,39 @@ def parse_and_filter_versions_list(required_fw_versions, version_list, env_setup
                 # this python_version requirement is not satisfied to required environment
                 # and requirement for a dependency will be skipped
                 return version_list
+        elif env_marker == 'sys_platform' and env_marker in env_setup:
+            split_env_req[1] = split_env_req[1].strip(' ').replace("'", "").replace('"', '')
+            if '==' in env_req:
+                if env_setup['sys_platform'] != split_env_req[1]:
+                    # this sys_platform requirement is not satisfied to required environment
+                    # and requirement for a dependency will be skipped
+                    return version_list
+            elif '!=' in env_req:
+                if env_setup['sys_platform'] == split_env_req[1]:
+                    # this sys_platform requirement is not satisfied to required environment
+                    # and requirement for a dependency will be skipped
+                    return version_list
+            else:
+                log.error("Error during platform version check, line: {}".format(line))
         else:
             log.error("{} is unsupported environment marker and it will be ignored".format(env_marker),
                       extra={'is_warning': True})
 
     # parse a requirement for a dependency
-    requirement = splited_requirement[0]
-    splited_versions_by_conditions = re.split(r"==|>=|<=|>|<|~=", requirement)
-    splited_versions_by_conditions = [l.strip(',') for l in splited_versions_by_conditions]
+    requirement = split_requirement[0]
+    split_versions_by_conditions = re.split(r"==|>=|<=|>|<|~=", requirement)
+    split_versions_by_conditions = [l.strip(',').strip(' ') for l in split_versions_by_conditions]
 
-    if len(splited_versions_by_conditions) == 0:
+    if len(split_versions_by_conditions) == 0:
         return version_list
-    if len(splited_versions_by_conditions) == 1:
-        version_list.append((splited_versions_by_conditions[0], None, None))
+    if len(split_versions_by_conditions) == 1:
+        version_list.append((split_versions_by_conditions[0], None, None))
     else:
-        splited_required_versions= re.split(r",", requirement)
-        for i, l in enumerate(splited_required_versions):
+        split_required_versions= re.split(r",", requirement)
+        for i, l in enumerate(split_required_versions):
             for comparison in ['==', '>=', '<=', '<', '>', '~=']:
                 if comparison in l:
-                    version_list.append((splited_versions_by_conditions[0], comparison, splited_versions_by_conditions[i + 1]))
+                    version_list.append((split_versions_by_conditions[0], comparison, split_versions_by_conditions[i + 1]))
                     break
     return version_list
 
@@ -124,9 +142,12 @@ def get_module_version_list_from_file(file_name, env_setup):
     Returned object is:
     [('tensorflow', '>=', '1.2.0'), ('networkx', '==', '2.1'), ('numpy', None, None)]
     """
-    req_dict = list()
+    req_dict = []
     with open(file_name) as f:
         for line in f:
+            # handle comments
+            line = line.split('#')[0]
+
             req_dict = parse_and_filter_versions_list(line, req_dict, env_setup)
     return req_dict
 
@@ -191,6 +212,7 @@ def get_environment_setup():
         exec("del tensorflow")
     except (AttributeError, ImportError):
         pass
+    env_setup['sys_platform'] = sys.platform
     return env_setup
 
 
diff --git a/model-optimizer/unit_tests/mo/utils/versions_checker_test.py b/model-optimizer/unit_tests/mo/utils/versions_checker_test.py
index 2942d4c1194..40c3d6ec277 100644
--- a/model-optimizer/unit_tests/mo/utils/versions_checker_test.py
+++ b/model-optimizer/unit_tests/mo/utils/versions_checker_test.py
@@ -52,8 +52,9 @@ class TestingVersionsChecker(unittest.TestCase):
     def test_get_module_version_list_from_file3(self, mock_open):
         mock_open.return_value.__enter__ = mock_open
         mock_open.return_value.__iter__ = mock.Mock(
-            return_value=iter(['tensorflow>=1.15.2,<2.0; python_version < "3.8"',
-                               'tensorflow>=2.0; python_version >= "3.8"',
+            return_value=iter(['# Commented line',
+                               'tensorflow>=1.15.2,<2.0; python_version < "3.8"',
+                               'tensorflow>=2.0; python_version >= "3.8" # Comment after line',
                                'numpy==1.12.0',
                                'defusedxml<=0.5.0',
                                'networkx~=1.11']))
@@ -79,12 +80,71 @@ class TestingVersionsChecker(unittest.TestCase):
 
     def test_append_version_list(self):
         v1 = 'mxnet>=1.0.0,<=1.3.1'
-        req_list = list()
+        req_list = []
         parse_and_filter_versions_list(v1, req_list, {})
         ref_list = [('mxnet', '>=', '1.0.0'),
                     ('mxnet', '<=', '1.3.1')]
-        for i, v in enumerate(req_list):
-            self.assertEqual(v, ref_list[i])
+        for i, v in enumerate(ref_list):
+            self.assertEqual(v, req_list[i])
+
+    def test_append_version_list_sys_neg_1(self):
+        v1 = "mxnet>=1.7.0 ; sys_platform != 'win32'"
+        req_list = []
+        parse_and_filter_versions_list(v1, req_list, {'sys_platform': 'darwin'})
+        ref_list = [('mxnet', '>=', '1.7.0')]
+        for i, v in enumerate(ref_list):
+            self.assertEqual(v, req_list[i])
+
+    def test_append_version_list_sys_neg_2(self):
+        v1 = "mxnet>=1.7.0 ; sys_platform != 'win32'"
+        req_list = []
+        parse_and_filter_versions_list(v1, req_list, {'sys_platform': 'win32'})
+        ref_list = []
+        for i, v in enumerate(ref_list):
+            self.assertEqual(v, req_list[i])
+
+    def test_append_version_list_sys(self):
+        v1 = "mxnet>=1.7.0 ; sys_platform == 'linux'"
+        req_list = []
+
+        parse_and_filter_versions_list(v1, req_list, {'sys_platform': 'linux'})
+        ref_list = [('mxnet', '>=', '1.7.0')]
+        for i, v in enumerate(ref_list):
+            self.assertEqual(v, req_list[i])
+
+    def test_append_version_list_sys_double_quotes(self):
+        v1 = "mxnet>=1.7.0 ; sys_platform == \"linux\""
+        req_list = []
+
+        parse_and_filter_versions_list(v1, req_list, {'sys_platform': 'linux'})
+        ref_list = [('mxnet', '>=', '1.7.0')]
+        for i, v in enumerate(ref_list):
+            self.assertEqual(v, req_list[i])
+
+    def test_append_version_list_py_ver_single_quotes(self):
+        v1 = "mxnet>=1.7.0 ; python_version < '3.8'"
+        req_list = []
+
+        parse_and_filter_versions_list(v1, req_list, {'python_version': '3.7.1'})
+        ref_list = [('mxnet', '>=', '1.7.0')]
+        for i, v in enumerate(ref_list):
+            self.assertEqual(v, req_list[i])
+
+    def test_append_version_list_sys_python_ver_1(self):
+        v1 = "mxnet>=1.7.0 ; sys_platform == 'linux' or python_version >= \"3.8\""
+        req_list = []
+        parse_and_filter_versions_list(v1, req_list, {'python_version': '3.8.1', 'sys_platform': 'linux'})
+        ref_list = []
+        for i, v in enumerate(ref_list):
+            self.assertEqual(v, req_list[i])
+
+    def test_append_version_list_sys_python_ver_2(self):
+        v1 = "mxnet>=1.7.0 ; sys_platform == 'linux' and python_version >= \"3.8\""
+        req_list = []
+        parse_and_filter_versions_list(v1, req_list, {'python_version': '3.7.1', 'sys_platform': 'linux'})
+        ref_list = []
+        for i, v in enumerate(ref_list):
+            self.assertEqual(v, req_list[i])
 
     def test_version_check_equal(self):
         modules_versions_list = [('module_1', '==', '2.0', '2.0'),

From 2896b3af8c1b465926cdc9db5a9ea1fb2238dc1c Mon Sep 17 00:00:00 2001
From: Jozef Daniecki <jozef.daniecki@intel.com>
Date: Thu, 6 May 2021 16:16:52 +0200
Subject: [PATCH 67/73] Convert op specification refactoring. (#5530)

* Convert op specification refactoring.

* Minor readability improvements.

* Fixed 'category' formatting.
---
 docs/ops/type/Convert_1.md | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/docs/ops/type/Convert_1.md b/docs/ops/type/Convert_1.md
index ef9b0d9e952..c39d19a4587 100644
--- a/docs/ops/type/Convert_1.md
+++ b/docs/ops/type/Convert_1.md
@@ -2,9 +2,18 @@
 
 **Versioned name**: *Convert-1*
 
-**Category**: type conversion
+**Category**: *Type conversion*
+
+**Short description**: *Convert* operation performs element-wise conversion on a given input tensor to a type specified in the *destination_type* attribute.
+
+**Detailed description**
+
+Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float `3.141592` may be round to a 32-bit int `3`. The result of unsupported conversions is undefined, e.g. convertion of negative signed integer value to any unsigned integer type.
+
+\f[
+o_{i} = convert(a_{i})
+\f]
 
-**Short description**: Operation converts all elements of the input tensor to a type specified in the *"destination_type"* attribute.
 
 **Attributes**:
 
@@ -12,29 +21,21 @@
 
   * **Description**: the destination type
   * **Range of values**: one of the supported types *T*
-  * **Type**: string
+  * **Type**: `string`
   * **Default value**: None
   * **Required**: *Yes*
 
 **Inputs**
 
-* **1**: A tensor of type T. **Required.**
+* **1**: A tensor of type *T* and arbitrary shape. **Required.**
 
 **Outputs**
 
-* **1**: The result of element-wise *"Convert"* operation. A tensor of *"destination_type"* type and the same shape with input tensor.
+* **1**: The result of element-wise *Convert* operation. A tensor of *destination_type* type and the same shape as input tensor.
 
 **Types**
 
-* *T*: u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, boolean, bf16
-
-**Detailed description**
-
-Conversion from one supported type to another supported type is always allowed. User must be aware of precision loss and value change caused by range difference between two types. For example, a 32-bit float *3.141592* may be round to a 32-bit int *3*.
-
-\f[
-o_{i} = convert(a_{i})
-\f]
+* *T*: `u8`, `u16`, `u32`, `u64`, `i8`, `i16`, `i32`, `i64`, `f16`, `f32`, `boolean`, `bf16`
 
 **Examples**
 

From 2bb8e9facc6a575f02993d7da82fab060a030e12 Mon Sep 17 00:00:00 2001
From: Poliksenov Ivan <ivan.poliksenov@intel.com>
Date: Thu, 6 May 2021 17:29:06 +0300
Subject: [PATCH 68/73] [IE][VPU]: Removed constant DDR_MAX_SIZE = 512. (#4372)

* Removed constant DDR_MAX_SIZE = 512.
Removed the DDR_MAX_SIZE constant as it could potentially lead to incorrect behavior of devices with a different DDR size (Prism Creek can be up to 2 GB in size). Removed the use of this constant in methods.
---
 .../vpu/middleend/allocator/allocator.hpp     |  5 ++-
 .../vpu/middleend/allocator/structs.hpp       |  1 -
 .../src/middleend/allocator/allocator.cpp     | 39 ++++++-------------
 .../middleend/passes/adjust_data_location.cpp |  5 +--
 4 files changed, 17 insertions(+), 33 deletions(-)

diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/allocator.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/allocator.hpp
index f6ef6376dde..657f8f6934a 100644
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/allocator.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/allocator.hpp
@@ -96,6 +96,8 @@ public:
     DataSet& getCandidatesForCMX() { return _candidatesForCMX; }
     bool removeCMXCandidates(const Data& data);
 
+    std::size_t freeCMXMemoryAmount() const;
+
     AllocatorForShaves& getAllocatorOfShaves() { return _allocatorOfShaves; }
 
 private:
@@ -107,8 +109,7 @@ private:
 
     void extractDatas(MemoryType memType, const DataSet& from, DataVector& out) const;
 
-    std::size_t freeDDRMemoryAmount() const;
-    std::size_t freeCMXMemoryAmount() const;
+    void updateChildDataAllocation(const Data& data);
 
 private:
     int _modelBatchSize = 1;
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/structs.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/structs.hpp
index 0883e0ae5da..d5cbc059279 100644
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/structs.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/structs.hpp
@@ -18,7 +18,6 @@ namespace vpu {
 // Common allocation constants
 //
 
-const int DDR_MAX_SIZE = 512 * 1024 * 1024;
 const int CMX_SLICE_SIZE = 128 * 1024;
 const int DATA_ALIGNMENT = 64;
 const int CMX_SHAVE_BUFFER_SIZE = 100 * 1024;
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp
index 06df10867d8..ce796737858 100644
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp
@@ -58,9 +58,7 @@ Allocator::Allocator(): _allocatorOfShaves(_cmxMemoryPool) {
     _memPools.emplace(MemoryType::CMX, &_cmxMemoryPool);
 }
 
-namespace {
-
-void updateChildDataAllocation(const Data& data, int offsetLimitation) {
+void Allocator::updateChildDataAllocation(const Data& data) {
     for (const auto& edge : data->childDataToDataEdges()) {
         auto parent = edge->parent();
         auto child = edge->child();
@@ -77,8 +75,7 @@ void updateChildDataAllocation(const Data& data, int offsetLimitation) {
             }
 
             memoryOffset += byteOffset;
-
-            IE_ASSERT(memoryOffset + child->lastElemOffset() <= offsetLimitation);
+            IE_ASSERT(parent->dataLocation().location != Location::CMX || memoryOffset + child->lastElemOffset() <= _maxCmxSize);
         } else if (edge->mode() == SharedDataMode::Reshape) {
             IE_ASSERT(parent->checkStrides(StridesRequirement::compact()));
             IE_ASSERT(child->checkStrides(StridesRequirement::compact()));
@@ -88,10 +85,12 @@ void updateChildDataAllocation(const Data& data, int offsetLimitation) {
 
         child->setDataAllocationInfo({parent->dataLocation().location, memoryOffset});
 
-        updateChildDataAllocation(child, offsetLimitation);
+        updateChildDataAllocation(child);
     }
 }
 
+namespace {
+
 int getInUse(const Data& data) {
     int inUse = 0;
     inUse += data->numConsumers();
@@ -131,7 +130,7 @@ bool Allocator::allocateData(const Data& data) {
         if (_allocatedData.count(data) == 0) {
             IE_ASSERT(data->parentDataToDataEdge() == nullptr);
 
-            updateChildDataAllocation(data, 0);
+            updateChildDataAllocation(data);
 
             _allocatedData.emplace(data);
         }
@@ -156,7 +155,7 @@ bool Allocator::allocateData(const Data& data) {
             data->setIOInfo(Location::Input, alignVal(_inputMemOffset, DATA_ALIGNMENT));
             _inputMemOffset = alignVal(_inputMemOffset, DATA_ALIGNMENT) + finalByteSize;
 
-            updateChildDataAllocation(data, DDR_MAX_SIZE);
+            updateChildDataAllocation(data);
 
             _allocatedData.emplace(data);
         }
@@ -181,7 +180,7 @@ bool Allocator::allocateData(const Data& data) {
             data->setIOInfo(Location::Output, alignVal(_outputMemOffset, DATA_ALIGNMENT));
             _outputMemOffset = alignVal(_outputMemOffset, DATA_ALIGNMENT) + finalByteSize;
 
-            updateChildDataAllocation(data, DDR_MAX_SIZE);
+            updateChildDataAllocation(data);
 
             _allocatedData.emplace(data);
         }
@@ -204,7 +203,7 @@ bool Allocator::allocateData(const Data& data) {
             data->setDataAllocationInfo({Location::Blob, _blobMemOffset});
             _blobMemOffset += finalByteSize;
 
-            updateChildDataAllocation(data, DDR_MAX_SIZE);
+            updateChildDataAllocation(data);
 
             _allocatedData.emplace(data);
         }
@@ -290,8 +289,7 @@ bool Allocator::allocateData(const Data& data) {
 
     data->setDataAllocationInfo({chunk->memType == MemoryType::CMX ? Location::CMX : Location::BSS, chunk->pointer});
 
-    auto offsetLimitation = (data->dataLocation().location == Location::CMX) ? _maxCmxSize : DDR_MAX_SIZE;
-    updateChildDataAllocation(data, offsetLimitation);
+    updateChildDataAllocation(data);
 
     _memChunksPerData.emplace(data, chunk);
     _allocatedIntermData.emplace(data);
@@ -432,7 +430,7 @@ void Allocator::freeData(const Data& data, DeallocationMode mode) {
             _memChunksPerData[data] = ddrChunk;
 
             data->setDataAllocationInfo({Location::BSS, ddrChunk->pointer});
-            updateChildDataAllocation(data, DDR_MAX_SIZE);
+            updateChildDataAllocation(data);
 
             break;
         }
@@ -465,14 +463,6 @@ UsedMemory Allocator::usedMemoryAmount() const {
     return stats;
 }
 
-std::size_t Allocator::freeDDRMemoryAmount() const {
-    const auto& pool = _memPools.at(MemoryType::DDR);
-    const auto offset = pool->curMemOffset;
-    VPU_THROW_UNLESS(offset <= DDR_MAX_SIZE, "Out of bound offset for next free data in DDR: size = {}, while offset = {}", DDR_MAX_SIZE, offset);
-
-    return DDR_MAX_SIZE - offset;
-}
-
 std::size_t Allocator::freeCMXMemoryAmount() const {
     const auto& pool = _memPools.at(MemoryType::CMX);
     const auto shavesCMX = _allocatorOfShaves.getLockedSHAVEs() * CMX_SLICE_SIZE;
@@ -482,10 +472,6 @@ std::size_t Allocator::freeCMXMemoryAmount() const {
     return _maxCmxSize - offset;
 }
 
-std::size_t Allocator::freeMemoryAmount(const MemoryType& type) const {
-    return type == MemoryType::CMX ? freeCMXMemoryAmount() : freeDDRMemoryAmount();
-}
-
 void Allocator::extractDatas(MemoryType memType, const DataSet& from, DataVector& out) const {
     for (const auto& data : from) {
         if (data->usage() != DataUsage::Intermediate)
@@ -540,8 +526,7 @@ allocator::MemChunk* Allocator::allocateMem(MemoryType memType, int size, int in
     // Check free space
     //
 
-    const auto freeSpace = freeMemoryAmount(memType);
-    if (static_cast<std::size_t>(size) > freeSpace) {
+    if (memType == MemoryType::CMX && static_cast<std::size_t>(size) > freeCMXMemoryAmount()) {
         return nullptr;
     }
 
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp
index a0e195bd180..c734ef2a06b 100644
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp
@@ -232,9 +232,8 @@ void PassImpl::adjustModelForMemReqs(const Model& model) {
 
         const auto failedData = allocRes.failedData;
         VPU_THROW_UNLESS(!failedData || failedData->memReqs() == MemoryType::CMX,
-            R"(Stage "{}" of type "{}" requested {} bytes in {} for output "{}", while there is only {} bytes is free)",
-            failedStage->name(), failedStage->type(), calcAllocationSize(failedData), failedData->memReqs(), failedData->name(),
-                         allocator.freeMemoryAmount(failedData->memReqs()));
+            R"(Request {} bytes in {} for output "{}" failed for stage "{}" of type "{}")",
+            calcAllocationSize(failedData), failedData->memReqs(), failedData->name(), failedStage->name(), failedStage->type());
 
         auto allCmxDatas = allocator.getAllocatedDatas(MemoryType::CMX);
         env.log->trace("Got %d datas in CMX : %v", allCmxDatas.size(), allCmxDatas);

From a19413c0c0e84e6a2fc2d28bf177749da3b09984 Mon Sep 17 00:00:00 2001
From: Gorokhov Dmitriy <dmitry.gorokhov@intel.com>
Date: Thu, 6 May 2021 19:49:24 +0300
Subject: [PATCH 69/73] [CPU] Plugin migration on ngraph (#4344)

---
 .../src/mkldnn_plugin/CMakeLists.txt          |   14 +-
 .../src/mkldnn_plugin/cpu_types.h             |  124 +
 .../emitters/jit_eltwise_emitters.cpp         |   18 +-
 .../emitters/jit_load_store_emitters.cpp      |    1 -
 .../emitters/jit_mkldnn_emitters.cpp          |    2 +-
 .../src/mkldnn_plugin/mkldnn_descriptor.cpp   |   12 -
 .../src/mkldnn_plugin/mkldnn_descriptor.h     |    3 -
 .../src/mkldnn_plugin/mkldnn_edge.cpp         |    3 +-
 .../src/mkldnn_plugin/mkldnn_exec_network.cpp |  248 +-
 .../src/mkldnn_plugin/mkldnn_exec_network.h   |    3 +-
 .../mkldnn_plugin/mkldnn_extension_mngr.cpp   |    7 +-
 .../src/mkldnn_plugin/mkldnn_extension_mngr.h |    3 +-
 .../src/mkldnn_plugin/mkldnn_graph.cpp        |  489 +-
 .../src/mkldnn_plugin/mkldnn_graph.h          |   35 +-
 .../src/mkldnn_plugin/mkldnn_graph_dumper.cpp |  317 +-
 .../src/mkldnn_plugin/mkldnn_graph_dumper.h   |    3 -
 .../mkldnn_plugin/mkldnn_graph_optimizer.cpp  | 1715 ++---
 .../mkldnn_plugin/mkldnn_graph_optimizer.h    |   28 +-
 .../mkldnn_plugin/mkldnn_infer_request.cpp    |   71 +-
 .../src/mkldnn_plugin/mkldnn_memory.cpp       |   79 +-
 .../src/mkldnn_plugin/mkldnn_node.cpp         |  422 +-
 .../src/mkldnn_plugin/mkldnn_node.h           |  361 +-
 .../src/mkldnn_plugin/mkldnn_plugin.cpp       |  141 +-
 .../convert_broadcast_to_tiles.cpp            |   98 +
 .../convert_broadcast_to_tiles.hpp            |   17 +
 .../convert_matmul_to_fc_or_gemm.cpp          |  251 +
 .../convert_matmul_to_fc_or_gemm.hpp          |   23 +
 .../convert_tile_to_seq_tiles.cpp             |   95 +
 .../convert_tile_to_seq_tiles.hpp             |   17 +
 .../convert_to_cpu_specific_opset.hpp         |   49 +
 .../convert_to_leaky_relu.cpp                 |   38 +
 .../convert_to_leaky_relu.hpp                 |   17 +
 .../convert_to_power_static.cpp               |  131 +
 .../convert_to_power_static.hpp               |   17 +
 .../convert_to_swish_cpu.cpp                  |   41 +
 .../convert_to_swish_cpu.hpp                  |   17 +
 .../ngraph_transformations/fc_bias_fusion.cpp |   70 +
 .../ngraph_transformations/fc_bias_fusion.hpp |   17 +
 .../op/fully_connected.cpp                    |   45 +
 .../op/fully_connected.hpp                    |   47 +
 .../ngraph_transformations/op/leaky_relu.cpp  |   31 +
 .../ngraph_transformations/op/leaky_relu.hpp  |   33 +
 .../op/power_static.cpp                       |   35 +
 .../op/power_static.hpp                       |   34 +
 .../ngraph_transformations/op/swish_cpu.cpp   |   31 +
 .../ngraph_transformations/op/swish_cpu.hpp   |   27 +
 .../ngraph_transformations/reshape_1d_ops.cpp |  175 +
 .../ngraph_transformations/reshape_1d_ops.hpp |   35 +
 .../reshape_fc_fusion.cpp                     |   80 +
 .../reshape_fc_fusion.hpp                     |   17 +
 .../reshape_fully_connected.cpp               |   84 +
 .../reshape_fully_connected.hpp               |   25 +
 .../ngraph_transformations/reshape_prelu.cpp  |   35 +
 .../ngraph_transformations/reshape_prelu.hpp  |   17 +
 .../rnn_sequences_optimization.cpp            |  153 +
 .../rnn_sequences_optimization.hpp            |   29 +
 .../src/mkldnn_plugin/nodes/argmax.cpp        |   55 -
 .../src/mkldnn_plugin/nodes/argmax_imp.cpp    |  417 --
 .../src/mkldnn_plugin/nodes/argmax_imp.hpp    |   27 -
 .../src/mkldnn_plugin/nodes/base.hpp          |  144 +-
 .../mkldnn_plugin/nodes/batch_to_space.cpp    |  244 -
 .../src/mkldnn_plugin/nodes/broadcast.cpp     |  135 -
 .../src/mkldnn_plugin/nodes/bucketize.cpp     |   65 +-
 .../nodes/ctc_greedy_decoder.cpp              |   77 +-
 .../nodes/ctc_greedy_decoder_seq_len.cpp      |   98 +-
 .../src/mkldnn_plugin/nodes/ctc_loss.cpp      |   78 +-
 .../src/mkldnn_plugin/nodes/cum_sum.cpp       |   87 +-
 .../mkldnn_plugin/nodes/detectionoutput.cpp   |  170 +-
 .../nodes/detectionoutput_onnx.cpp            |   74 +-
 .../nodes/embedding_bag_offset_sum.cpp        |  247 -
 .../nodes/embedding_bag_packed_sum.cpp        |   67 -
 .../mkldnn_plugin/nodes/embedding_bag_sum.cpp |  209 -
 .../mkldnn_plugin/nodes/embedding_bag_sum.hpp |   63 -
 .../nodes/embedding_segments_sum.cpp          |  134 -
 .../nodes/extract_image_patches.cpp           |  110 +-
 .../nodes/extract_image_patches.hpp           |   13 +-
 .../src/mkldnn_plugin/nodes/fill.cpp          |  124 -
 .../src/mkldnn_plugin/nodes/gather.cpp        |  154 -
 .../mkldnn_plugin/nodes/gather_elements.cpp   |  149 -
 .../src/mkldnn_plugin/nodes/gather_nd.cpp     |  230 -
 .../src/mkldnn_plugin/nodes/gather_tree.cpp   |  101 +-
 .../src/mkldnn_plugin/nodes/grn.cpp           |   39 +-
 .../src/mkldnn_plugin/nodes/list.hpp          |   12 +-
 .../src/mkldnn_plugin/nodes/list_tbl.hpp      |   32 +-
 .../src/mkldnn_plugin/nodes/log_softmax.cpp   |   44 +-
 .../src/mkldnn_plugin/nodes/math.cpp          |  258 +-
 .../nodes/mkldnn_batch_to_space_node.cpp      |  237 +
 .../nodes/mkldnn_batch_to_space_node.h        |   40 +
 .../nodes/mkldnn_batchnorm_node.cpp           |  281 -
 .../nodes/mkldnn_batchnorm_node.h             |   44 -
 .../nodes/mkldnn_bin_conv_node.cpp            |  136 +-
 .../nodes/mkldnn_bin_conv_node.h              |   10 +-
 .../nodes/mkldnn_broadcast_node.cpp           |  133 +
 .../nodes/mkldnn_broadcast_node.h             |   35 +
 .../nodes/mkldnn_concat_node.cpp              |   52 +-
 .../mkldnn_plugin/nodes/mkldnn_concat_node.h  |    3 +-
 .../mkldnn_plugin/nodes/mkldnn_conv_node.cpp  |  597 +-
 .../mkldnn_plugin/nodes/mkldnn_conv_node.h    |   58 +-
 .../nodes/mkldnn_convert_node.cpp             |   71 +-
 .../mkldnn_plugin/nodes/mkldnn_convert_node.h |    8 +-
 .../nodes/mkldnn_deconv_node.cpp              |  217 +-
 .../mkldnn_plugin/nodes/mkldnn_deconv_node.h  |   16 +-
 .../nodes/mkldnn_def_conv_node.cpp            |   70 +-
 .../nodes/mkldnn_def_conv_node.h              |    4 +-
 .../nodes/mkldnn_depth_to_space_node.cpp      |   75 +-
 .../nodes/mkldnn_depth_to_space_node.h        |    4 +-
 .../nodes/mkldnn_eltwise_node.cpp             |  926 +--
 .../mkldnn_plugin/nodes/mkldnn_eltwise_node.h |   84 +-
 .../mkldnn_embedding_bag_offset_sum_node.cpp  |  132 +
 .../mkldnn_embedding_bag_offset_sum_node.h    |   43 +
 .../mkldnn_embedding_bag_packed_sum_node.cpp  |   99 +
 .../mkldnn_embedding_bag_packed_sum_node.h    |   38 +
 .../nodes/mkldnn_embedding_bag_sum_node.cpp   |  141 +
 .../nodes/mkldnn_embedding_bag_sum_node.h     |   52 +
 .../mkldnn_embedding_segments_sum_node.cpp    |  134 +
 .../mkldnn_embedding_segments_sum_node.h      |   45 +
 ...node.cpp => mkldnn_fake_quantize_node.cpp} |  562 +-
 ...ize_node.h => mkldnn_fake_quantize_node.h} |   25 +-
 .../nodes/mkldnn_fullyconnected_node.cpp      |  205 +-
 .../nodes/mkldnn_fullyconnected_node.h        |   20 +-
 .../nodes/mkldnn_gather_elements_node.cpp     |  145 +
 .../nodes/mkldnn_gather_elements_node.h       |   43 +
 .../nodes/mkldnn_gather_nd_node.cpp           |  218 +
 .../nodes/mkldnn_gather_nd_node.h             |   45 +
 .../nodes/mkldnn_gather_node.cpp              |  137 +
 .../mkldnn_plugin/nodes/mkldnn_gather_node.h  |   55 +
 .../nodes/mkldnn_generic_node.cpp             |   57 +-
 .../mkldnn_plugin/nodes/mkldnn_generic_node.h |    7 +-
 .../mkldnn_plugin/nodes/mkldnn_input_node.cpp |  105 +-
 .../mkldnn_plugin/nodes/mkldnn_input_node.h   |   10 +-
 .../nodes/mkldnn_interpolate_node.cpp         |  395 +-
 .../nodes/mkldnn_interpolate_node.h           |   24 +-
 .../mkldnn_plugin/nodes/mkldnn_lrn_node.cpp   |  137 +-
 .../src/mkldnn_plugin/nodes/mkldnn_lrn_node.h |   13 +-
 ...n_gemm_node.cpp => mkldnn_matmul_node.cpp} |  174 +-
 ...kldnn_gemm_node.h => mkldnn_matmul_node.h} |   12 +-
 .../nodes/mkldnn_memory_node.cpp              |   55 +-
 .../nodes/mkldnn_memory_node.hpp              |   12 +-
 .../mkldnn_plugin/nodes/mkldnn_mvn_node.cpp   |  280 +-
 .../src/mkldnn_plugin/nodes/mkldnn_mvn_node.h |   28 +-
 .../nodes/mkldnn_normalize_node.cpp           |  617 +-
 .../nodes/mkldnn_normalize_node.h             |   40 +-
 .../nodes/mkldnn_one_hot_node.cpp             |  147 +
 .../mkldnn_plugin/nodes/mkldnn_one_hot_node.h |   63 +
 .../mkldnn_plugin/nodes/mkldnn_pad_node.cpp   |  141 +-
 .../src/mkldnn_plugin/nodes/mkldnn_pad_node.h |   12 +-
 .../nodes/mkldnn_pooling_node.cpp             |   99 +-
 .../mkldnn_plugin/nodes/mkldnn_pooling_node.h |    5 +-
 .../nodes/mkldnn_psroi_pooling_node.cpp       |  558 ++
 .../nodes/mkldnn_psroi_pooling_node.h         |   84 +
 .../nodes/mkldnn_reduce_node.cpp              |  410 +-
 .../mkldnn_plugin/nodes/mkldnn_reduce_node.h  |   30 +-
 .../nodes/mkldnn_reference_node.cpp           |   85 +
 .../nodes/mkldnn_reference_node.h             |   30 +
 .../nodes/mkldnn_region_yolo_node.cpp         |  422 ++
 .../nodes/mkldnn_region_yolo_node.h           |   76 +
 .../nodes/mkldnn_reorder_node.cpp             |    8 +-
 .../mkldnn_plugin/nodes/mkldnn_reorder_node.h |    5 +-
 .../nodes/mkldnn_reshape_node.cpp             |   12 +-
 .../mkldnn_plugin/nodes/mkldnn_reshape_node.h |    2 +-
 .../src/mkldnn_plugin/nodes/mkldnn_rnn.cpp    |  806 +-
 .../src/mkldnn_plugin/nodes/mkldnn_rnn.h      |   27 +-
 .../nodes/mkldnn_roi_align_node.cpp           |   76 +-
 .../nodes/mkldnn_roi_align_node.h             |   16 +-
 .../nodes/mkldnn_roi_pooling_node.cpp         |   71 +-
 .../nodes/mkldnn_roi_pooling_node.h           |   15 +-
 .../mkldnn_plugin/nodes/mkldnn_roll_node.cpp  |  109 +-
 .../mkldnn_plugin/nodes/mkldnn_roll_node.h    |    4 +-
 .../nodes/mkldnn_scatter_update_node.cpp      |  101 +-
 .../nodes/mkldnn_scatter_update_node.h        |    6 +-
 .../nodes/mkldnn_select_node.cpp              |  222 +
 .../mkldnn_plugin/nodes/mkldnn_select_node.h  |   51 +
 .../nodes/mkldnn_softmax_node.cpp             |   29 +-
 .../mkldnn_plugin/nodes/mkldnn_softmax_node.h |    4 +-
 .../nodes/mkldnn_space_to_batch_node.cpp      |  242 +
 .../nodes/mkldnn_space_to_batch_node.h        |   40 +
 .../nodes/mkldnn_space_to_depth_node.cpp      |   75 +-
 .../nodes/mkldnn_space_to_depth_node.h        |    4 +-
 .../mkldnn_plugin/nodes/mkldnn_split_node.cpp |  102 +-
 .../mkldnn_plugin/nodes/mkldnn_split_node.h   |    5 +-
 .../nodes/mkldnn_strided_slice_node.cpp       |  112 +-
 .../nodes/mkldnn_strided_slice_node.h         |    4 +-
 .../nodes/mkldnn_tensoriterator_node.cpp      |  278 +-
 .../nodes/mkldnn_tensoriterator_node.h        |   30 +-
 .../mkldnn_plugin/nodes/mkldnn_tile_node.cpp  |   99 +-
 .../mkldnn_plugin/nodes/mkldnn_tile_node.h    |   12 +-
 ...ute_node.cpp => mkldnn_transpose_node.cpp} |  137 +-
 ...permute_node.h => mkldnn_transpose_node.h} |   17 +-
 .../nodes/non_max_suppression.cpp             |  177 +-
 .../src/mkldnn_plugin/nodes/one_hot.cpp       |  168 -
 .../src/mkldnn_plugin/nodes/powerfile.cpp     |   63 -
 .../src/mkldnn_plugin/nodes/priorbox.cpp      |  344 -
 .../nodes/priorbox_clustered.cpp              |  123 -
 .../nodes/priorgridgenerator_onnx.cpp         |   59 +-
 .../src/mkldnn_plugin/nodes/proposal.cpp      |  143 +-
 .../src/mkldnn_plugin/nodes/proposal_onnx.cpp |   49 +-
 .../src/mkldnn_plugin/nodes/psroi.cpp         |  523 --
 .../src/mkldnn_plugin/nodes/range.cpp         |   98 +-
 .../src/mkldnn_plugin/nodes/region_yolo.cpp   |  446 --
 .../src/mkldnn_plugin/nodes/reorg_yolo.cpp    |   39 +-
 .../mkldnn_plugin/nodes/reverse_sequence.cpp  |   77 +-
 .../nodes/roifeatureextractor_onnx.cpp        |   44 +-
 .../src/mkldnn_plugin/nodes/select.cpp        |  229 -
 .../mkldnn_plugin/nodes/shuffle_channels.cpp  |   84 +-
 .../src/mkldnn_plugin/nodes/simplernms.cpp    |  338 -
 .../mkldnn_plugin/nodes/space_to_batch.cpp    |  254 -
 .../nodes/sparse_fill_empty_rows.cpp          |  227 -
 .../nodes/sparse_segment_reduce.cpp           |  189 -
 .../mkldnn_plugin/nodes/sparse_to_dense.cpp   |  137 -
 .../nodes/sparse_weighted_reduce.cpp          |  232 -
 .../src/mkldnn_plugin/nodes/squeeze.cpp       |   63 -
 .../src/mkldnn_plugin/nodes/topk.cpp          |  108 +-
 .../src/mkldnn_plugin/nodes/topkrois_onnx.cpp |   45 +-
 .../src/mkldnn_plugin/nodes/unique.cpp        |  192 -
 .../src/mkldnn_plugin/nodes/unsqueeze.cpp     |   58 -
 .../src/mkldnn_plugin/utils/cpu_utils.hpp     |   94 +
 .../src/mkldnn_plugin/utils/general_utils.h   |   30 +
 .../src/mkldnn_plugin/utils/ngraph_utils.hpp  |   42 +
 .../cpu/bfloat16/bf16_network_restoring.cpp   |    2 +
 .../plugin/cpu/bfloat16/concat_in_place.cpp   |    4 +-
 .../plugin/cpu/bfloat16/conv_add.cpp          |    4 +-
 .../plugin/cpu/bfloat16/conv_conv.cpp         |    4 +-
 .../plugin/cpu/bfloat16/conv_dwconv_relu.cpp  |    4 +-
 .../cpu/bfloat16/conv_eltwise_depthwise.cpp   |    2 +
 .../conv_relu_pool_conv_relu_pool.cpp         |    4 +-
 .../plugin/cpu/bfloat16/elt_max.cpp           |    2 +
 .../functional/plugin/cpu/bfloat16/elt_x3.cpp |    2 +
 .../cpu/bfloat16/faster_100_5_1_1_conv.cpp    |    4 +-
 .../plugin/cpu/bfloat16/gather_multiply.cpp   |    2 +
 .../gather_x2_add_mul_relu_concat_matmul.cpp  |    4 +-
 .../plugin/cpu/bfloat16/memory_conv.cpp       |    2 +
 .../bfloat16/mobilenet_ssd_with_branching.cpp |    2 +
 .../bfloat16/scaleshift_conv_eltwise_conv.cpp |    6 +-
 .../scaleshift_conv_eltwise_relu_conv.cpp     |    6 +-
 .../scaleshift_conv_eltwise_scaleshift.cpp    |    4 +-
 .../cpu/bfloat16/scaleshift_conv_elu_conv.cpp |    4 +-
 .../cpu/bfloat16/scaleshift_conv_relu.cpp     |    6 +-
 .../scaleshift_conv_x2_concat_relu.cpp        |    4 +-
 .../bfloat16/scaleshift_conv_x2_eltwise.cpp   |    4 +-
 .../scaleshift_conv_x2_mixed1_eltwise.cpp     |    6 +-
 .../scaleshift_conv_x2_mixed2_eltwise.cpp     |    6 +-
 .../bfloat16/scaleshift_conv_x3_eltwise.cpp   |    4 +-
 .../scaleshift_x2_conv_x2_eltwise.cpp         |    6 +-
 .../scaleshift_x3_conv_eltwise_relu.cpp       |    6 +-
 .../cpu/bfloat16/tail_fp32_optimization.cpp   |    4 +-
 .../plugin/cpu/bfloat16/topk_inputs_i32.cpp   |    4 +-
 .../behavior/add_output.cpp                   |   22 -
 .../behavior/memory_states.cpp                |   21 -
 .../behavior/set_blob.cpp                     |    1 +
 .../runtime_precision.cpp                     |    2 +-
 .../single_layer_tests/activation.cpp         |   14 +-
 .../single_layer_tests/convolution.cpp        |   29 +
 .../single_layer_tests/lrn.cpp                |   78 +-
 .../single_layer_tests/tile.cpp               |   12 +-
 .../skip_tests_config.cpp                     |   20 +-
 .../subgraph_tests/split_concat_memory.cpp    |    4 -
 .../cpu/single_layer_tests/activation.cpp     |   35 +-
 .../cpu/single_layer_tests/batch_to_space.cpp |    4 +-
 .../convert_to_plugin_specific_node.cpp       |  120 +
 .../cpu/single_layer_tests/convolution.cpp    |   17 +-
 .../plugin/cpu/single_layer_tests/eltwise.cpp |   97 +-
 .../cpu/single_layer_tests/fake_quantize.cpp  |    4 +-
 .../single_layer_tests/gather_elements.cpp    |    2 +-
 .../single_layer_tests/group_convolution.cpp  |   11 +-
 .../cpu/single_layer_tests/gru_sequence.cpp   |   23 +-
 .../cpu/single_layer_tests/interpolate.cpp    |    8 +-
 .../cpu/single_layer_tests/lstm_sequence.cpp  |   29 +-
 .../plugin/cpu/single_layer_tests/mat_mul.cpp |  205 +
 .../cpu/single_layer_tests/normalize.cpp      |   42 +-
 .../plugin/cpu/single_layer_tests/one_hot.cpp |    7 +-
 .../cpu/single_layer_tests/psroi_pooling.cpp  |    2 +-
 .../cpu/single_layer_tests/reduce_ops.cpp     |   48 +-
 .../cpu/single_layer_tests/region_yolo.cpp    |    2 +-
 .../cpu/single_layer_tests/rnn_sequence.cpp   |    4 +-
 .../plugin/cpu/single_layer_tests/softmax.cpp |    2 +-
 .../cpu/single_layer_tests/space_to_batch.cpp |    4 +-
 .../{permute.cpp => transpose.cpp}            |   20 +-
 .../subgraph_tests/include/conv_concat.hpp    |   25 -
 ...reorder.hpp => fuse_transpose_reorder.hpp} |   12 +-
 .../src/add_conver_to_reorder.cpp             |    6 +-
 .../cpu/subgraph_tests/src/conv3d_reshape.cpp |   98 +
 .../subgraph_tests/src/conv_maxpool_activ.cpp |   85 +
 ...reorder.cpp => fuse_transpose_reorder.cpp} |  112 +-
 .../cpu/subgraph_tests/src/reshape_fc.cpp     |   98 +
 .../plugin/cpu/test_utils/cpu_test_utils.hpp  |   23 +
 .../cpu/test_utils/fusing_test_utils.cpp      |   11 +-
 .../cpu/test_utils/fusing_test_utils.hpp      |  152 +-
 .../single_layer_tests/pooling.cpp            |   20 +-
 .../include/behavior/exec_graph_info.hpp      |    2 +-
 .../shared/include/behavior/set_blob.hpp      |    3 +-
 .../include/single_layer_tests/loop.hpp       |    2 +
 .../subgraph_tests/split_concat_memory.hpp    |    2 +
 .../plugin/shared/src/behavior/add_output.cpp |    3 +
 .../src/behavior/invalid_cases/proposal.cpp   |    2 +
 .../plugin/shared/src/behavior/set_blob.cpp   |   24 +-
 .../mat_mul_transformation.cpp                |    2 +
 .../unit/cpu/mkldnn_memory_desc_test.cpp      |   28 +-
 .../config_param_test/config_param_test.cpp   |   52 -
 .../functional/mkldnn/dummy.cpp               |    4 +
 .../extensions_tests/extensions_test.cpp      |  274 -
 .../network_tests/ngraph_network_test.cpp     |  359 -
 .../regression_tests/regression_reference.cpp |   13 -
 .../single_layer_tests.cpp                    |  233 -
 .../graph_tools_functional_tests.cpp          |   26 -
 .../common_dyn_batch_regression.cpp           |   16 -
 .../input_tests/parser_tests.cpp              |   36 -
 .../io_blob_tests/cropResize_tests.cpp        |  250 -
 .../io_blob_tests/dims_tests.cpp              |    7 -
 .../io_blob_tests/layout_tests.cpp            |   15 -
 .../lstm/lstm_cell_test.cpp                   |    7 -
 .../lstm/lstm_ir_test.cpp                     |   10 -
 .../lstm/rnn_seq_test.cpp                     |    7 -
 .../network_tests/network_test.cpp            |  202 -
 ...ecision_transformer_single_layer_tests.cpp |  862 ---
 .../single_layer_tests/argmax_tests.cpp       |  211 -
 .../single_layer_tests/concat_tests.cpp       |  277 -
 .../single_layer_tests/conv_int8_tests.cpp    |  363 -
 .../mkldnn/single_layer_tests/conv_tests.cpp  |  429 --
 .../single_layer_tests/conv_tests_int8.cpp    |  452 --
 .../single_layer_tests/detectionout_tests.cpp |  189 -
 .../single_layer_tests/fullycon_tests.cpp     |  185 -
 .../mkldnn_batchnorm_tests.cpp                |  175 -
 .../mkldnn_deconv_tests.cpp                   |  231 -
 .../mkldnn_logistic_tests.cpp                 |  139 -
 .../single_layer_tests/mkldnn_power_tests.cpp |  152 -
 .../mkldnn_roipooling_tests.cpp               |  101 -
 .../mkldnn_scaleshift_tests.cpp               |  170 -
 .../mkldnn_simplernms_tests.cpp               |  151 -
 .../mkldnn/single_layer_tests/norm_tests.cpp  |  182 -
 .../single_layer_tests/pooling_tests.cpp      |  213 -
 .../single_layer_tests/priorbox_tests.cpp     |  369 -
 .../single_layer_tests/region_yolo_tests.cpp  |  234 -
 .../mkldnn/snippet_test/multi_out_test.cpp    |  125 -
 .../mkldnn/snippet_test/tripple_test.cpp      |  118 -
 .../functional/mkldnn/test_model_repo.cpp     |   17 -
 .../tests_deprecated/unit/CMakeLists.txt      |   23 -
 .../mkldnn/constant_propagation_test.cpp      |  304 -
 .../unit/engines/mkldnn/convert_desc_test.cpp |   75 -
 .../unit/engines/mkldnn/dummy.cpp             |    4 +
 .../unit/engines/mkldnn/dump_test.cpp         |  138 -
 .../unit/engines/mkldnn/dumper_test.cpp       |   98 -
 .../layers/extensions/broadcast_tests.cpp     |  276 -
 .../layers/extensions/bucketize_tests.cpp     |  249 -
 .../graph/layers/extensions/fake_layer.cpp    |  131 -
 .../graph/layers/extensions/fill_tests.cpp    |  194 -
 .../graph/layers/extensions/gather_tests.cpp  |  684 --
 .../layers/extensions/graph_generic_test.cpp  | 1521 ----
 .../layers/extensions/log_softmax_tests.cpp   |  273 -
 .../graph/layers/extensions/math_tests.cpp    |  319 -
 .../graph/layers/extensions/mvn_tests.cpp     |  646 --
 .../extensions/non_max_suppression_tests.cpp  |  568 --
 .../layers/extensions/normalize_tests.cpp     |  640 --
 .../graph/layers/extensions/onehot_tests.cpp  |  854 ---
 .../graph/layers/extensions/range_tests.cpp   |  247 -
 .../graph/layers/extensions/reduce_tests.cpp  |  535 --
 .../extensions/reverse_sequence_tests.cpp     |  265 -
 .../graph/layers/extensions/scatter_tests.cpp |  203 -
 .../graph/layers/extensions/select_tests.cpp  |  280 -
 .../extensions/shuffle_channels_tests.cpp     |  205 -
 .../sparse_fill_empty_rows_tests.cpp          |  545 --
 .../sparse_segment_reduce_tests.cpp           |  302 -
 .../extensions/sparse_to_dense_tests.cpp      |  279 -
 .../sparse_weighted_reduce_tests.cpp          |  416 -
 .../layers/extensions/strided_slice_tests.cpp |  487 --
 .../graph/layers/extensions/topk_tests.cpp    |  519 --
 .../graph/layers/extensions/unique_tests.cpp  |  370 -
 .../layers/internal/graph_activation_test.cpp |  422 --
 .../graph_batchnorm_scaleshift_test.cpp       |  344 -
 .../layers/internal/graph_batchnorm_test.cpp  |  312 -
 .../layers/internal/graph_concat_test.cpp     | 1030 ---
 .../graph/layers/internal/graph_conv_test.cpp |  531 --
 .../layers/internal/graph_deconv_test.cpp     |  555 --
 .../layers/internal/graph_depthwise_test.cpp  |  456 --
 .../layers/internal/graph_eltwise_test.cpp    |  379 -
 .../internal/graph_fullyconnected_test.cpp    |  337 -
 .../graph/layers/internal/graph_gemm_test.cpp |  662 --
 .../layers/internal/graph_input_test.cpp      |  471 --
 .../layers/internal/graph_leaks_test.cpp      |  271 -
 .../graph/layers/internal/graph_lrn_test.cpp  |  301 -
 .../layers/internal/graph_permute_test.cpp    |  635 --
 .../layers/internal/graph_pooling_test.cpp    |  504 --
 .../layers/internal/graph_power_test.cpp      |  332 -
 .../graph/layers/internal/graph_relu_test.cpp |  244 -
 .../layers/internal/graph_reorder_test.cpp    |  256 -
 .../layers/internal/graph_reshape_test.cpp    |  304 -
 .../internal/graph_roi_pooling_test.cpp       |  313 -
 .../layers/internal/graph_simplernms_test.cpp |  473 --
 .../layers/internal/graph_softmax_test.cpp    |  419 --
 .../layers/internal/graph_split_test.cpp      |  501 --
 .../graph/layers/internal/graph_tile_test.cpp |  281 -
 .../structure/graph_conv_concat_tests.cpp     |  267 -
 .../graph_conv_depthwise_fusing_test.cpp      |  337 -
 .../structure/graph_deconv_concat_tests.cpp   |  397 -
 .../structure/graph_dw_conv_fusing_test.cpp   |  334 -
 .../structure/graph_optimization_test.cpp     |  421 --
 .../graph/structure/graph_structure_test.cpp  | 6671 -----------------
 .../unit/engines/mkldnn/graph/test_graph.hpp  |  362 -
 .../engines/mkldnn/mkldnn_primitive_test.cpp  |   54 -
 .../unit/engines/mkldnn/test_layers.cpp       |  183 -
 .../include/ngraph/op/util/attr_types.hpp     |    2 +
 ngraph/core/src/op/binary_convolution.cpp     |    2 +-
 ngraph/core/src/op/depth_to_space.cpp         |    2 +-
 ngraph/core/src/op/interpolate.cpp            |    5 +-
 ngraph/core/src/op/roi_align.cpp              |    3 +-
 ngraph/core/src/op/space_to_depth.cpp         |    2 +-
 ngraph/core/src/pass/convert_precision.cpp    |    5 +
 ngraph/python/tests/__init__.py               |    5 +-
 ngraph/python/tests/test_onnx/test_backend.py |    5 +-
 .../tests/test_onnx/test_ops_convpool.py      |    4 +-
 ngraph/test/onnx/onnx_import.in.cpp           |    3 +-
 .../desktop_references_config.xml             |   12 +-
 411 files changed, 14378 insertions(+), 51599 deletions(-)
 create mode 100644 inference-engine/src/mkldnn_plugin/cpu_types.h
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/argmax_imp.hpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/embedding_bag_offset_sum.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/embedding_bag_packed_sum.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.hpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/embedding_segments_sum.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/fill.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/gather.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/gather_elements.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/gather_nd.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h
 rename inference-engine/src/mkldnn_plugin/nodes/{mkldnn_quantize_node.cpp => mkldnn_fake_quantize_node.cpp} (81%)
 rename inference-engine/src/mkldnn_plugin/nodes/{mkldnn_quantize_node.h => mkldnn_fake_quantize_node.h} (89%)
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h
 rename inference-engine/src/mkldnn_plugin/nodes/{mkldnn_gemm_node.cpp => mkldnn_matmul_node.cpp} (64%)
 rename inference-engine/src/mkldnn_plugin/nodes/{mkldnn_gemm_node.h => mkldnn_matmul_node.h} (77%)
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.h
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.h
 rename inference-engine/src/mkldnn_plugin/nodes/{mkldnn_permute_node.cpp => mkldnn_transpose_node.cpp} (79%)
 rename inference-engine/src/mkldnn_plugin/nodes/{mkldnn_permute_node.h => mkldnn_transpose_node.h} (65%)
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/psroi.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/select.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/unique.cpp
 delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp
 create mode 100644 inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp
 delete mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp
 delete mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/memory_states.cpp
 create mode 100644 inference-engine/tests/functional/plugin/cpu/single_layer_tests/convert_to_plugin_specific_node.cpp
 create mode 100644 inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp
 rename inference-engine/tests/functional/plugin/cpu/single_layer_tests/{permute.cpp => transpose.cpp} (88%)
 rename inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/{fuse_permute_reorder.hpp => fuse_transpose_reorder.hpp} (69%)
 create mode 100644 inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv3d_reshape.cpp
 create mode 100644 inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv_maxpool_activ.cpp
 rename inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/{fuse_permute_reorder.cpp => fuse_transpose_reorder.cpp} (62%)
 create mode 100644 inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/config_param_test/config_param_test.cpp
 create mode 100644 inference-engine/tests_deprecated/functional/mkldnn/dummy.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/extensions_tests/extensions_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/network_tests/ngraph_network_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/regression_tests/regression_reference.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/common_single_layer_tests/single_layer_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/graph_tools/graph_tools_functional_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/inference_engine_regression_tests/common_dyn_batch_regression.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/input_tests/parser_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/cropResize_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/dims_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/layout_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/lstm_cell_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/lstm_ir_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/rnn_seq_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/argmax_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/concat_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_int8_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests_int8.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/detectionout_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/fullycon_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_batchnorm_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_deconv_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_logistic_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_power_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_roipooling_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_scaleshift_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_simplernms_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/norm_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/pooling_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/priorbox_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/region_yolo_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/snippet_test/multi_out_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/snippet_test/tripple_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/functional/mkldnn/test_model_repo.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/convert_desc_test.cpp
 create mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/dummy.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/dump_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/mkldnn_primitive_test.cpp
 delete mode 100644 inference-engine/tests_deprecated/unit/engines/mkldnn/test_layers.cpp

diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
index fa3ff943360..388a600a697 100644
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -54,21 +54,16 @@ if(SELECTIVE_BUILD STREQUAL "ON")
     endif()
 endif()
 
-target_link_libraries(${TARGET_NAME} PRIVATE mkldnn inference_engine inference_engine_legacy
-                                             inference_engine_transformations inference_engine_lp_transformations)
+target_link_libraries(${TARGET_NAME} PRIVATE mkldnn
+                                             inference_engine
+                                             inference_engine_transformations
+                                             inference_engine_lp_transformations)
 
 target_include_directories(${TARGET_NAME} PRIVATE
         $<TARGET_PROPERTY:mkldnn,INCLUDE_DIRECTORIES>)
 
 # Cross compiled function
 # TODO: The same for proposal, proposalONNX, topk
-cross_compiled_file(${TARGET_NAME}
-        ARCH AVX512F AVX2 SSE42 ANY
-                    nodes/argmax_imp.cpp
-        API         nodes/argmax_imp.hpp
-        NAME        arg_max_execute
-        NAMESPACE   InferenceEngine::Extensions::Cpu::XARCH
-)
 cross_compiled_file(${TARGET_NAME}
         ARCH AVX2 ANY
                     nodes/proposal_imp.cpp
@@ -85,7 +80,6 @@ add_library(${TARGET_NAME}_obj OBJECT ${SOURCES} ${HEADERS})
 target_link_libraries(${TARGET_NAME}_obj PUBLIC mkldnn)
 
 target_include_directories(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:inference_engine_preproc_s,INTERFACE_INCLUDE_DIRECTORIES>
-                                                      $<TARGET_PROPERTY:inference_engine_legacy,INTERFACE_INCLUDE_DIRECTORIES>
                                                       $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
                                                       $<TARGET_PROPERTY:openvino::itt,INTERFACE_INCLUDE_DIRECTORIES>
                                                       $<TARGET_PROPERTY:inference_engine_lp_transformations,INTERFACE_INCLUDE_DIRECTORIES>
diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.h b/inference-engine/src/mkldnn_plugin/cpu_types.h
new file mode 100644
index 00000000000..860353f12d2
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/cpu_types.h
@@ -0,0 +1,124 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+namespace MKLDNNPlugin {
+
+enum Algorithm {
+    Undefined,
+
+    // Pooling algorithms
+    PoolingMax,
+    PoolingAvg,
+
+    // Convolution algorithms
+    ConvolutionCommon,
+    ConvolutionGrouped,
+
+    // Convolution algorithms
+    DeconvolutionCommon,
+    DeconvolutionGrouped,
+
+    // Elementwise algorithms
+    EltwiseAdd,
+    EltwiseMultiply,
+    EltwiseSubtract,
+    EltwiseDivide,
+    EltwiseFloorMod,
+    EltwiseMod,
+    EltwiseMaximum,
+    EltwiseMinimum,
+    EltwiseSquaredDifference,
+    EltwisePowerDynamic,
+    EltwisePowerStatic,
+    EltwiseMulAdd,
+    EltwiseEqual,
+    EltwiseNotEqual,
+    EltwiseGreater,
+    EltwiseGreaterEqual,
+    EltwiseLess,
+    EltwiseLessEqual,
+    EltwiseLogicalAnd,
+    EltwiseLogicalOr,
+    EltwiseLogicalXor,
+    EltwiseLogicalNot,
+    EltwiseRelu,
+    EltwiseGelu,
+    EltwiseElu,
+    EltwiseTanh,
+    EltwiseSigmoid,
+    EltwiseAbs,
+    EltwiseSqrt,
+    EltwiseSoftRelu,
+    EltwiseExp,
+    EltwiseClamp,
+    EltwiseSwish,
+    EltwisePrelu,
+    EltwiseMish,
+    EltwiseHswish,
+    EltwiseHsigmoid,
+    EltwiseRoundHalfToEven,
+    EltwiseRoundHalfAwayFromZero,
+    EltwiseErf,
+
+    // FakeQuantize algorithms
+    FQCommon,
+    FQQuantization,
+    FQBinarization,
+
+    // ROIPooling algorithms
+    ROIPoolingMax,
+    ROIPoolingBilinear,
+
+    // ROIAlign algorithms
+    ROIAlignMax,
+    ROIAlignAvg,
+
+    // PSROIPooling algorithms
+    PSROIPoolingAverage,
+    PSROIPoolingBilinear,
+    PSROIPoolingBilinearDeformable,
+
+    // Reduce algorithms
+    ReduceL1,
+    ReduceL2,
+    ReduceAnd,
+    ReduceOr,
+    ReduceMax,
+    ReduceMean,
+    ReduceMin,
+    ReduceProd,
+    ReduceSum,
+    ReduceLogSum,
+    ReduceLogSumExp,
+    ReduceSumSquare,
+
+    // Math algorithms
+    MathAbs,
+    MathAcos,
+    MathAcosh,
+    MathAsin,
+    MathAsinh,
+    MathAtan,
+    MathAtanh,
+    MathCeiling,
+    MathCos,
+    MathCosh,
+    MathErf,
+    MathFloor,
+    MathHardSigmoid,
+    MathLog,
+    MathNegative,
+    MathReciprocal,
+    MathSelu,
+    MathSign,
+    MathSin,
+    MathSinh,
+    MathSoftPlus,
+    MathSoftsign,
+    MathTan
+};
+
+} // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp b/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp
index 0bb8b152be0..29c17d3f172 100644
--- a/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp
+++ b/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp
@@ -4,9 +4,8 @@
 
 #include "jit_eltwise_emitters.hpp"
 #include <cpu/x64/jit_uni_eltwise.hpp>
-#include "legacy/ie_layers.h"
-
 #include <ngraph/opsets/opset1.hpp>
+#include <nodes/mkldnn_eltwise_node.h>
 
 using namespace InferenceEngine;
 using namespace mkldnn::impl::utils;
@@ -1305,15 +1304,16 @@ jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_
 
     prepare_table();
 }
+
 jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {
-    auto *powerLayer = dynamic_cast<InferenceEngine::PowerLayer *>(node->getCnnLayer().get());
-    if (powerLayer == nullptr)
-        IE_THROW() << "Cannot convert power layer.";
-
-    power = powerLayer->power;
-    scale = powerLayer->scale;
-    shift = powerLayer->offset;
+    const MKLDNNEltwiseNode *powerNode = dynamic_cast<const MKLDNNEltwiseNode *>(node);
+    if (powerNode == nullptr) {
+        IE_THROW() << "Can't cast to MKLDNNEltwiseNode";
+    }
+    power = powerNode->getAlpha();
+    scale = powerNode->getBeta();
+    shift = powerNode->getGamma();
 
     prepare_table();
 }
diff --git a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp
index 3907bf5b9a0..276791b7d7d 100644
--- a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp
+++ b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp
@@ -4,7 +4,6 @@
 
 #include "jit_emitter.hpp"
 #include "jit_load_store_emitters.hpp"
-#include "legacy/ie_layers.h"
 #include <cpu/x64/jit_generator.hpp>
 #include "utils/bfloat16.hpp"
 
diff --git a/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp b/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp
index af1555694b8..02371895f59 100644
--- a/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp
+++ b/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp
@@ -25,7 +25,7 @@ jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa,
 jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, InferenceEngine::Precision exec_prc)
     : jit_emitter(host, host_isa, node, exec_prc) {
     auto eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode*>(node);
-    kind = static_cast<mkldnn_alg_kind_t>(eltwiseNode->getAlgorithm());
+    kind = static_cast<mkldnn_alg_kind_t>(eltwiseNode->getMKLDNNAlgorithm());
     alpha = eltwiseNode->getAlpha();
     beta = eltwiseNode->getBeta();
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp
index 10ed3d432eb..99002688e90 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp
@@ -23,18 +23,6 @@ size_t MKLDNNDescriptor::outputNumbers() const {
     return 1;
 }
 
-MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::batch_normalization_forward::desc> desc) {
-    this->desc.reset(new DescFwdImpl<mkldnn::batch_normalization_forward::desc>(desc));
-}
-
-MKLDNNDescriptor::operator std::shared_ptr<mkldnn::batch_normalization_forward::desc>() {
-    auto typeDesc = std::dynamic_pointer_cast<DescFwdImpl<mkldnn::batch_normalization_forward::desc>>(desc);
-    if (typeDesc == nullptr) {
-        IE_THROW() << "Cannot cast descriptor!";
-    }
-    return typeDesc->getPtr();
-}
-
 MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc> desc) {
     this->desc.reset(new DescFwdImpl<mkldnn::convolution_forward::desc>(desc));
 }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h
index e73b505790a..5025e1a025d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h
@@ -10,9 +10,6 @@
 
 class MKLDNNDescriptor {
 public:
-    explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::batch_normalization_forward::desc> desc);
-    operator std::shared_ptr<mkldnn::batch_normalization_forward::desc>();
-
     explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc> desc);
     operator std::shared_ptr<mkldnn::convolution_forward::desc>();
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
index 4094c29ca8e..f09ae78bfef 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@@ -6,6 +6,7 @@
 #include "mkldnn_node.h"
 #include "mkldnn_extension_utils.h"
 #include <blob_factory.hpp>
+#include "utils/cpu_utils.hpp"
 
 using namespace mkldnn;
 namespace MKLDNNPlugin {
@@ -603,7 +604,7 @@ InferenceEngine::Blob::Ptr MKLDNNEdge::getBlob() {
     else
         desc = InferenceEngine::TensorDesc(desc.getPrecision(), dims.ToSizeVector(), desc.getBlockingDesc());
 
-    return make_blob_with_precision(desc, memoryPtr->GetData());
+    return isEmptyTensorDesc(desc) ? make_blob_with_precision(desc) : make_blob_with_precision(desc, memoryPtr->GetData());
 }
 
 void MKLDNNEdge::sharedMemFrom(const MKLDNNEdgePtr &edge) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
index 8122cf1054f..ac8b9c32e59 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@@ -4,7 +4,6 @@
 
 #include <ie_metric_helpers.hpp>
 #include <precision_utils.h>
-#include <legacy/net_pass.h>
 #include "mkldnn_exec_network.h"
 
 #include "mkldnn_async_infer_request.h"
@@ -12,8 +11,6 @@
 #include "mkldnn_memory_state.h"
 #include "mkldnn_itt.h"
 #include "nodes/mkldnn_memory_node.hpp"
-#include <legacy/ie_util_internal.hpp>
-#include <legacy/graph_tools.hpp>
 #include <threading/ie_executor_manager.hpp>
 
 #include <threading/ie_cpu_streams_executor.hpp>
@@ -23,7 +20,8 @@
 #include <unordered_set>
 #include <utility>
 #include <cstring>
-#include <legacy/details/ie_cnn_network_tools.h>
+#include <ngraph/opsets/opset1.hpp>
+#include <transformations/utils/utils.hpp>
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -43,189 +41,17 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
     extensionManager(extMgr),
     _cfg{cfg},
     _name{network.getName()},
-    _numaNodesWeights(numaNodesWeights) {
-    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet");
-
-    // we are cloning network if we have statistics and we can transform network.
-    _clonedNetwork = cloneNetwork(network);
-
-    bool isFloatModel = true;
-    if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) {
-        // Check if network is INT8 or Binary.
-        CNNNetworkIterator iter(network);
-        while (iter != CNNNetworkIterator()) {
-            if (CaselessEq<std::string>()((*iter)->type, "FakeQuantize")) {
-                isFloatModel = false;
-                break;
-            }
-            iter++;
-        }
-
-        auto changePrecisionBF16 = [&](Precision current, Precision target) {
-            InputsDataMap inputs = _clonedNetwork.getInputsInfo();
-            OutputsDataMap outputs = _clonedNetwork.getOutputsInfo();
-            CNNNetworkIterator iter(_clonedNetwork);
-            while (iter != CNNNetworkIterator()) {
-                //  check, if memory output node needs to be transformed
-                if (current == Precision::FP32 &&
-                    (*iter)->type == "Memory" && (*iter)->outData.size() == 0 &&
-                    (*iter)->insData[0].lock()->getPrecision() == current) {
-                    (*iter)->insData[0].lock()->setPrecision(target);
-                }
-
-                for (size_t o = 0; o < (*iter)->outData.size(); o++) {
-                    if (inputs.find((*iter)->outData[o]->getName()) == inputs.end()
-                        && outputs.find((*iter)->outData[o]->getName()) == outputs.end()
-                        && !CaselessEq<std::string>()((*iter)->type, "const")
-                        && (*iter)->outData[o]->getPrecision() == current) {
-                        (*iter)->outData[o]->setPrecision(target);
-                    }
-                }
-                iter++;
-            }
-        };
-
-        if (with_cpu_x86_avx512_core()) {
-            // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin.
-            // Otherwise, only layers marked as BF16 in '_clonedNetwork' will be performed in bfloat16 mode.
-            // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin.
-
-            // BF16 + INT8 or BF16 + BIN models will be performed in mixed precision execution only if
-            // enforceBF16 flag was set manually
-            if (isFloatModel == false) {
-                if (cfg.manualEnforceBF16 == true)
-                    changePrecisionBF16(Precision::FP32, Precision::BF16);
-            } else if (cfg.enforceBF16 == true) {
-                changePrecisionBF16(Precision::FP32, Precision::BF16);
-            }
-        } else {
-            changePrecisionBF16(Precision::BF16, Precision::FP32);
-        }
+    _numaNodesWeights(numaNodesWeights),
+        _network(network) {
+    auto function = network.getFunction();
+    if (function == nullptr) {
+        IE_THROW() << "CPU plug-in doesn't support not ngraph-based model!";
     }
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "createConstInputs");
-    auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, const std::vector<size_t>& shape, const std::string& name) {
-        LayerParams attrs = {layer->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()};
-        auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
-        constLayer->blobs["custom"] = blob;
-
-        const TensorDesc& td = {blob->getTensorDesc().getPrecision(), shape, TensorDesc::getLayoutByDims(shape)};
-
-        DataPtr newEdgeAfterLayer(new Data(constLayer->name, td));
-        newEdgeAfterLayer->setName(constLayer->name);
-        getCreatorLayer(newEdgeAfterLayer) = constLayer;
-        getInputTo(newEdgeAfterLayer).clear();
-
-        IE_SUPPRESS_DEPRECATED_START
-        auto icnnnet = static_cast<ICNNNetwork::Ptr>(_clonedNetwork);
-        IE_SUPPRESS_DEPRECATED_END
-        auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(icnnnet);
-        IE_ASSERT(implNetwork != nullptr);
-        implNetwork->addData(constLayer->name.c_str(), newEdgeAfterLayer);
-        implNetwork->addLayer(constLayer);
-
-        constLayer->outData.push_back(newEdgeAfterLayer);
-        getInputTo(newEdgeAfterLayer)[layer->name] = layer;
-        layer->insData.push_back(newEdgeAfterLayer);
-    };
-
-    // The code block below transforms legacy layers to the form more compatible with opset1 in order to simplify future migration
-    // TODO: remove after plug-in is migrated on opset1
-    auto all_layers = details::CNNNetSortTopologically(_clonedNetwork);
-    for (auto &layer : all_layers) {
-        if (layer->type == "ScaleShift" && layer->insData.size() == 1) {
-            auto constDimsRank = layer->insData[0].lock()->getDims().size();
-
-            Blob::Ptr scalesBlob = layer->blobs["weights"];
-            if (scalesBlob != nullptr) {
-                std::vector<size_t> shape(constDimsRank, 1);
-                shape[shape.size() > 1 ? 1 : 0] = scalesBlob->size();
-
-                createConstInputTo(layer, scalesBlob, shape, "weights");
-            }
-
-            Blob::Ptr shiftBlob = layer->blobs["biases"];
-            if (shiftBlob != nullptr) {
-                std::vector<size_t> shape(constDimsRank, 1);
-                shape[shape.size() > 1 ? 1 : 0] = shiftBlob->size();
-
-                createConstInputTo(layer, shiftBlob, shape, "biases");
-            } else if (scalesBlob != nullptr) {
-                Blob::Ptr biases = make_shared_blob<float>(scalesBlob->getTensorDesc());
-                if (biases == nullptr)
-                    IE_THROW() << "Cannot make 'biases' shared blob";
-                biases->allocate();
-                auto biasesPtr = biases->buffer().as<float*>();
-                for (size_t i = 0; i < biases->size(); i++)
-                    biasesPtr[i] = 0;
-
-                std::vector<size_t> shape(constDimsRank, 1);
-                shape[shape.size() > 1 ? 1 : 0] = biases->size();
-
-                createConstInputTo(layer, biases, shape, "biases");
-            }
-        } else if (layer->type == "PReLU" && layer->insData.size() == 1) {
-            Blob::Ptr scalesBlob = layer->blobs["weights"];
-            if (scalesBlob != nullptr) {
-                std::vector<size_t> shape(layer->insData[0].lock()->getDims().size(), 1);
-                shape[shape.size() > 1 ? 1 : 0] = scalesBlob->size();
-
-                createConstInputTo(layer, scalesBlob, shape, "weights");
-            }
-        } else if (layer->type == "DeformableConvolution") {
-            auto * defConvLayer = dynamic_cast<DeformableConvolutionLayer*>(layer.get());
-            if (defConvLayer == nullptr)
-                IE_THROW() << "Cannot convert deformable convolution layer.";
-
-            Blob::Ptr weightsBlob = defConvLayer->blobs["weights"];
-            if (weightsBlob != nullptr) {
-                std::vector<size_t> shape;
-
-                if (defConvLayer->_group != 1) {
-                    shape.push_back(defConvLayer->_group);
-                }
-                shape.push_back(defConvLayer->_out_depth);
-                shape.push_back(defConvLayer->input()->getDims()[1]);
-                for (int i = 1; i <= defConvLayer->_kernel.size(); i++) {
-                    shape.push_back(defConvLayer->_kernel[defConvLayer->_kernel.size() - i]);
-                }
-
-                createConstInputTo(layer, weightsBlob, shape, "weights");
-
-                defConvLayer->blobs.clear();
-                defConvLayer->_weights = nullptr;
-            }
-        } else if (layer->type == "BinaryConvolution") {
-            auto * binConvLayer = dynamic_cast<BinaryConvolutionLayer*>(layer.get());
-            if (binConvLayer == nullptr)
-                IE_THROW() << "Cannot convert binary convolution layer.";
-
-            Blob::Ptr weightsBlob = binConvLayer->blobs["weights"];
-            if (weightsBlob != nullptr) {
-                std::vector<size_t> shape;
-
-                if (binConvLayer->_group != 1) {
-                    shape.push_back(binConvLayer->_group);
-                }
-                shape.push_back(binConvLayer->_out_depth);
-                shape.push_back(binConvLayer->input()->getDims()[1]);
-                for (int i = 1; i <= binConvLayer->_kernel.size(); i++) {
-                    shape.push_back(binConvLayer->_kernel[binConvLayer->_kernel.size() - i]);
-                }
-
-                createConstInputTo(layer, weightsBlob, shape, "weights");
-
-                binConvLayer->blobs.clear();
-                binConvLayer->_weights = nullptr;
-            }
-        }
-    }
-
-    OV_ITT_TASK_SKIP(taskChain);
+    bool isFloatModel = !ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(function);
 
     if (_cfg.batchLimit > 1) {
         // check topology for applicability
-        if (!CanProcessDynBatch(_clonedNetwork)) {
+        if (!CanProcessDynBatch(_network)) {
             IE_THROW() << "MKLDNNGraph::CreateGraph: such topology cannot be compiled for dynamic batch!";
         }
     }
@@ -293,12 +119,11 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() {
         std::exception_ptr exception;
         auto makeGraph = [&] {
             try {
-                auto localNetwork = cloneNetwork(_clonedNetwork);
                 {
                     std::lock_guard<std::mutex> lock{_cfgMutex};
                     graphLock._graph.setConfig(_cfg);
                 }
-                graphLock._graph.CreateGraph(localNetwork, extensionManager, _numaNodesWeights[numaNodeId]);
+                graphLock._graph.CreateGraph(_network, extensionManager, _numaNodesWeights[numaNodeId]);
             } catch(...) {
                 exception = std::current_exception();
             }
@@ -386,53 +211,48 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
 bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const {
     InputsDataMap inputs = network.getInputsInfo();
 
-    CNNLayerSet inputLayers;
-    std::unordered_set<CNNLayer *> allLayers;
-
     if (inputs.empty())
         return false;
 
-    auto & secondLayers = getInputTo(inputs.begin()->second->getInputData());
-    if (secondLayers.empty())
-        return false;
+    auto function = network.getFunction();
+    if (function == nullptr) {
+        IE_THROW() << "CPU plug-in doesn't support not ngraph-based model!";
+    }
 
-    bool check_result = true;
-    details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
-        auto type = TypeFromName(layer->type);
-        // This is WA for Tile layer
-        auto tileLayer = dynamic_cast<TileLayer *>(layer.get());
-        if (tileLayer && tileLayer->axis)
-            return;
+    auto ops = function->get_ordered_ops();
+    for (auto op : ops) {
+        auto type = TypeFromName(op->get_type_name());
+        if (type == Tile) {
+            const auto tile = std::dynamic_pointer_cast<const ngraph::opset1::Tile>(op);
+            const auto repeatsNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(tile->get_input_node_shared_ptr(1));
+            if (!repeatsNode)
+                return false;
+            if (tile && repeatsNode->cast_vector<int64_t>()[0] == 1)
+                continue;
+        }
 
-        auto reshapeLayer = dynamic_cast<ReshapeLayer *>(layer.get());
-        if (reshapeLayer &&
-            type == Reshape &&
-            (reshapeLayer->outData[0]->getTensorDesc().getDims()[0] ==
-             reshapeLayer->insData[0].lock()->getTensorDesc().getDims()[0])) {
-            return;
+        if (type == Reshape) {
+            if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0])
+                continue;
         }
 
         if (type != Input &&
             type != Output &&
             type != Convolution &&
             type != Deconvolution &&
-            type != Activation &&
-            type != Depthwise &&
             type != Lrn &&
             type != Pooling &&
             type != FullyConnected &&
-            type != Gemm &&
-            type != SoftMax &&
+            type != MatMul &&
+            type != Softmax &&
             type != Split &&
             type != Concatenation &&
-            type != Eltwise &&
-            type != BatchNormalization &&
-            type != Copy) {
-            check_result = false;
+                type != Eltwise) {
+            return false;
         }
-    }, false);
+    }
 
-    return check_result;
+    return true;
 }
 
 IE_SUPPRESS_DEPRECATED_START
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h
index 9696f73aad3..5c9b7edad27 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h
@@ -14,7 +14,6 @@
 #include <memory>
 #include <map>
 #include <string>
-#include <legacy/cnn_network_impl.hpp>
 #include <unordered_map>
 
 namespace MKLDNNPlugin {
@@ -49,7 +48,7 @@ protected:
     friend class MKLDNNInferRequest;
     MKLDNNExtensionManager::Ptr extensionManager;
     std::vector<InferenceEngine::IVariableStateInternal::Ptr> memoryStates;
-    InferenceEngine::CNNNetwork                 _clonedNetwork;
+    const InferenceEngine::CNNNetwork           _network;
     std::mutex                                  _cfgMutex;
     Config                                      _cfg;
     std::atomic_int                             _numRequests = {0};
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp
index fac29d5e22e..deb3fdff7d6 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp
@@ -31,17 +31,14 @@ InferenceEngine::ILayerImpl::Ptr MKLDNNExtensionManager::CreateImplementation(co
     return nullptr;
 }
 
-std::shared_ptr<InferenceEngine::ILayerImplFactory> MKLDNNExtensionManager::CreateExtensionFactory(
-        const InferenceEngine::CNNLayerPtr &layer) {
-    if (!layer)
-        IE_THROW() << "Cannot get cnn layer!";
+std::shared_ptr<InferenceEngine::ILayerImplFactory> MKLDNNExtensionManager::CreateExtensionFactory(const std::shared_ptr<ngraph::Node>& op) {
     std::shared_ptr<ILayerImplFactory> factory;
     for (auto& ext : _extensions) {
         ResponseDesc responseDesc;
         StatusCode rc = GENERAL_ERROR;
         ILayerImplFactory* factory_ptr = nullptr;
         if (auto mkldnnExt = std::dynamic_pointer_cast<Extensions::Cpu::MKLDNNExtensions>(ext))
-            rc = mkldnnExt->getFactoryFor(factory_ptr, layer.get(), &responseDesc);
+            rc = mkldnnExt->getFactoryFor(factory_ptr, op, &responseDesc);
         if (rc != OK) {
             factory = nullptr;
             continue;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h
index e205993792e..83ddfc3ffe1 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h
@@ -8,7 +8,6 @@
 #include <vector>
 #include <memory>
 #include <ie_iextension.h>
-#include <legacy/ie_layers.h>
 #include "nodes/list.hpp"
 
 namespace MKLDNNPlugin {
@@ -18,7 +17,7 @@ public:
     using Ptr = std::shared_ptr<MKLDNNExtensionManager>;
     MKLDNNExtensionManager() = default;
     InferenceEngine::ILayerImpl::Ptr CreateImplementation(const std::shared_ptr<ngraph::Node>& op);
-    std::shared_ptr<InferenceEngine::ILayerImplFactory> CreateExtensionFactory(const InferenceEngine::CNNLayerPtr& Layer);
+    std::shared_ptr<InferenceEngine::ILayerImplFactory> CreateExtensionFactory(const std::shared_ptr<ngraph::Node>& op);
     void AddExtension(InferenceEngine::IExtensionPtr extension);
 
 private:
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index b48f6a6f8ba..75db9a073e3 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -26,11 +26,9 @@
 #include <nodes/mkldnn_reorder_node.h>
 #include <nodes/mkldnn_convert_node.h>
 
-#include <legacy/graph_tools.hpp>
 #include <ie_algorithm.hpp>
 #include <blob_factory.hpp>
-#include <legacy/net_pass.h>
-#include <legacy/details/ie_cnn_network_tools.h>
+#include "nodes/common/cpu_memcpy.h"
 #include "nodes/common/cpu_convert.h"
 
 #include "precision_utils.h"
@@ -39,6 +37,14 @@
 #include "utils/general_utils.h"
 #include "utils/debug_capabilities.h"
 #include "utils/node_dumper.h"
+#include "utils/ngraph_utils.hpp"
+#include "utils/cpu_utils.hpp"
+
+#include <ngraph/node.hpp>
+#include <ngraph/function.hpp>
+#include <ngraph/variant.hpp>
+#include <ngraph/ops.hpp>
+#include <transformations/utils/utils.hpp>
 
 /*****************************************************
  * Debug capability
@@ -60,31 +66,7 @@ typedef std::vector<edge_cluster_t> edge_clusters_t;
 mkldnn::engine MKLDNNGraph::eng(mkldnn::engine::kind::cpu, 0);
 
 template<typename NET>
-void MKLDNNGraph::ApplyUnrollPasses(NET &net) {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::ApplyUnrollPasses");
-
-    NetPass::CombineRNNSeq(net);
-    bool ti_proc_ok = NetPass::UnrollRNN_if(net, [] (const RNNCellBase &rnn) -> bool {
-        if (rnn.clip != 0.0f)
-            return true;
-        if ((rnn.cellType == RNNCellBase::GRU || rnn.cellType == RNNCellBase::GRU_LBR) &&
-            rnn.activations != std::vector<std::string> {"sigmoid", "tanh"})
-            return true;
-        if (rnn.cellType == RNNCellBase::LSTM &&
-            rnn.activations != std::vector<std::string> {"sigmoid", "tanh", "tanh"})
-            return true;
-        return false;
-    });
-    if (!ti_proc_ok)
-        IE_THROW() << "Plugin doesn't support Tensor Iterator in pure form. "
-                              "None TI optimization pattern has been applied successfully";
-}
-
-template void MKLDNNGraph::ApplyUnrollPasses(TensorIterator::Body&);
-template void MKLDNNGraph::ApplyUnrollPasses(CNNNetwork&);
-
-template<typename NET>
-void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr& extMgr,
+void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMgr,
         MKLDNNWeightsSharing::Ptr &w_cache) {
     OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph");
 
@@ -98,233 +80,252 @@ void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr&
     status = Ready;
 }
 
-template void MKLDNNGraph::CreateGraph(const TensorIterator::Body&,
+template void MKLDNNGraph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
         const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
 template void MKLDNNGraph::CreateGraph(const CNNNetwork&,
         const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
 
-void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) {
+void MKLDNNGraph::Replicate(const std::shared_ptr<const ngraph::Function> &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) {
     this->_name = "subgraph";
     this->reuse_io_tensors = false;
 
-    // Map data object onto producer layer(node)
-    std::unordered_map<Data*, std::pair<MKLDNNNodePtr, int>> data2node;
+    // Map data object onto producer node
+    std::map<std::shared_ptr<ngraph::Node>, std::pair<MKLDNNNodePtr, int>> op2node;
 
     // nodes which has no consumers (output or just unused). But doesn't marked as graph output.
     // Will be stored as fake output separately.
-    std::unordered_set<DataPtr> unused_data;
+    std::deque<ngraph::Output<ngraph::Node>> unusedOutputs;
 
-    // Step 1. Replicate input nodes
-    for (const auto &input : subgraph.inputs) {
-        if (input->getPrecision() == Precision::UNSPECIFIED) continue;  // const node holder
-
-        auto creator = getCreatorLayer(input).lock();
-        if (creator == nullptr) {
-            creator.reset(new CNNLayer({input->getName(), "Input", input->getTensorDesc().getPrecision()}));
-            creator->outData.push_back(input);
+    auto getParentOutputPort = [](const std::shared_ptr<ngraph::Node> childOp, const std::shared_ptr<ngraph::Node> parentOp,
+                                  const size_t childInputPort) -> int {
+        for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) {
+            if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) {
+                return static_cast<int>(parentPort);
+            }
         }
 
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(creator, getEngine(), extMgr, weightsCache));
-        data2node[input.get()] = {node, 0};
+        return -1;
+    };
 
+    for (const auto op : subgraph->get_ordered_ops()) {
+        const MKLDNNNodePtr node {MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache)};
         graphNodes.push_back(node);
-        inputNodes[input->getName()] = node;
 
-        if (getInputTo(input).empty()) {
-            unused_data.insert(input);
+        if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) {
+            inputNodesMap[node->getName()] = node;
         }
-    }
 
-    // Step 2. Replicate all internal nodes.
-    for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) {
-        const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
-        graphNodes.push_back(node);
+        if (op->get_type_info() == ngraph::op::v0::Result::type_info) {
+            auto prev = op->get_input_node_shared_ptr(0);
+            std::string inputID;
+            inputID = prev->get_friendly_name();
+            if (prev->get_output_size() > 1) {
+                inputID += "." + std::to_string(op->get_input_source_output(0).get_index());
+            }
 
-        for (int port = 0; port < layer->insData.size(); port++) {
-            auto data = layer->insData[port].lock();
+            outputNodesMap[inputID] = node;
+        }
 
-            auto port_info = data2node[data.get()];
-            auto parent_node = port_info.first;
-            auto parent_port_idx = port_info.second;
+        for (size_t port = 0; port < op->get_input_size(); port++) {
+            auto parentOp = op->get_input_node_shared_ptr(port);
 
-            MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, port));
+            auto portInfo = op2node[parentOp];
+            auto parentNode = portInfo.first;
+
+            MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), port));
             node->addEdge(edge);
             graphEdges.push_back(edge);
         }
-        int out_port_idx = 0;
-        for (auto &out_data : layer->outData) {
-            data2node[out_data.get()] = {node, out_port_idx++};
-            if (getInputTo(out_data).empty()) {
-                unused_data.insert(out_data);
+
+        if (!MKLDNNPlugin::one_of(op->get_type_info(),
+                ngraph::op::v0::Result::type_info,
+                ngraph::op::v3::Assign::type_info,
+                ngraph::op::v6::Assign::type_info)) {
+            int outPortIdx = 0;
+            for (int oi = 0; oi < op->get_output_size(); oi++) {
+                op2node[op->output(oi).get_node_shared_ptr()] = {node, outPortIdx++};
+                if (op->get_output_target_inputs(oi).empty()) {
+                    unusedOutputs.push_back(op->output(oi));
+                }
             }
         }
     }
 
-    // Step 3. Add output nodes and output stubs for unused data objects.
-    for (const auto &output : subgraph.outputs) {
-        auto port_info = data2node[output.get()];
-        auto parent_node = port_info.first;
-        auto parent_port_idx = port_info.second;
-
-        CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()}));
-        layer->insData.push_back(output);
-
-        const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
-
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
-        node->addEdge(edge);
-        graphEdges.push_back(edge);
-        graphNodes.push_back(node);
-        outputNodes.push_back(node);
-
-        unused_data.erase(output);
-    }
-
     // Add stub output node for unused data
-    for (auto to_stub_data : unused_data) {
-        auto port_info = data2node[to_stub_data.get()];
-        auto parent_node = port_info.first;
-        auto parent_port_idx = port_info.second;
-
-        CNNLayerPtr layer(new CNNLayer({"stub_" + to_stub_data->getName(), "Output", to_stub_data->getTensorDesc().getPrecision()}));
-        layer->insData.push_back(to_stub_data);
-
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
-
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
-        node->addEdge(edge);
+    for (auto unusedOutput : unusedOutputs) {
+        auto portInfo = op2node[unusedOutput.get_node_shared_ptr()];
+        auto parentNode = portInfo.first;
+        auto port = portInfo.second;
+        const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
+        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outDims[port].ToSizeVector(),
+                                                                        parentNode->getOriginalOutputPrecisionAtPort(port),
+                                                                        nodeName, "Result", getEngine(), weightsCache);
+        MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
+        outNode->addEdge(edge);
         graphEdges.push_back(edge);
-        graphNodes.push_back(node);
+        graphNodes.push_back(outNode);
     }
 }
 
 void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
     OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "MKLDNNGraph::Replicate", "CNNNetwork");
-    InputsDataMap inputs = network.getInputsInfo();
+
+    InputsDataMap inputsInfo = network.getInputsInfo();
+    OutputsDataMap outputsInfo = network.getOutputsInfo();
 
     this->_name = network.getName();
 
-    // The input layer precision has to be equal to the InputData precision
-    std::map<std::string, Precision> changedPrecision;
-    for (const auto& input : inputs) {
-        auto inputLayer = getCreatorLayer(input.second->getInputData()).lock();
-        if (inputLayer) {
-            inputLayer->precision = inputLayer->outData[0]->getTensorDesc().getPrecision();
-        }
+    std::shared_ptr<const ngraph::Function> func = network.getFunction();
+    if (!func) {
+        IE_THROW() << "Function pointer inside CNNNetwork is nullptr";
     }
 
-    std::unordered_map<CNNLayerPtr, MKLDNNNodePtr> layer2node;
-    std::unordered_set<DataPtr> unused_data;  // nodes which has no consumers (output or just unused)
+    auto orderedOps = func->get_ordered_ops();
+
+    // TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ngraph::Node
+    std::map<std::shared_ptr<ngraph::Node>, MKLDNNNodePtr> op2node;
+    std::deque<ngraph::Output<ngraph::Node>> unusedOutputs;  // nodes which has no consumers (output or just unused)
+
+    auto getParentOutputPort = [](const std::shared_ptr<ngraph::Node> childOp, const std::shared_ptr<ngraph::Node> parentOp,
+                                  const size_t childInputPort) -> int {
+        for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) {
+            if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) {
+                return static_cast<int>(parentPort);
+            }
+        }
 
-    auto _parent_port = [] (const DataPtr &data) -> int {
-        auto parent = getCreatorLayer(data).lock();
-        for (int i = 0; parent->outData.size(); i++)
-            if (data == parent->outData[i])
-                return i;
         return -1;
     };
 
     OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AllNodes");
 
     // Replicate All Nodes in topological order
-    for (const auto layer : CNNNetSortTopologically(network)) {
-        CNNLayerPtr _layer = layer;
-        if (layer->type == "Memory" && layer->GetParamAsString("index") == "1") {
-            auto memoryId = layer->GetParamAsString("id");
-            Precision portPrecision = layer->outData[0]->getTensorDesc().getPrecision();
-            _layer.reset(new CNNLayer({layer->name + "/id=" + memoryId, "MemoryInput", portPrecision}));
-            _layer->params = layer->params;
-            _layer->outData = layer->outData;
-        }
-
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(_layer, getEngine(), extMgr, weightsCache));
+    for (const auto& op : orderedOps) {
+        const MKLDNNNodePtr node(MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache));
         graphNodes.push_back(node);
-        layer2node[layer] = node;
 
-        if (layer->params.count("originalLayersNames")) {
-            node->originalLayers = layer->params["originalLayersNames"];
+        if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) {
+            if (inputsInfo.count(node->getName()) != 0) {
+                inputNodesMap[node->getName()] = node;
+            }
         }
 
-        for (int port = 0; port < layer->insData.size(); port++) {
-            auto data = layer->insData[port].lock();
-            auto parent_layer = getCreatorLayer(data).lock();
-            if (!parent_layer) continue;  // no parent means that it is input data node (or memory/const layer)
+        if (op->get_type_info() == ngraph::op::v0::Result::type_info) {
+            // [NM] TODO: Several network has model outputs which mismatch with result node name
+            const auto &input = op->input_value(0);
+            NGRAPH_SUPPRESS_DEPRECATED_START
+            auto name = input.get_tensor().get_name();
+            NGRAPH_SUPPRESS_DEPRECATED_END
+            if (name.empty()) {
+                name = ngraph::op::util::create_ie_output_name(input);
+            }
 
-            auto parent_node = layer2node[parent_layer];
+            if (outputsInfo.count(name) != 0) {
+                outputNodesMap[name] = node;
+            }
+        }
 
-            MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), port));
+        op2node[op] = node;
+
+        for (size_t port = 0; port < op->get_input_size(); port++) {
+            auto parentOp = op->get_input_node_shared_ptr(port);
+            auto parentNode = op2node[parentOp];
+
+            MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast<int>(port)));
             node->addEdge(edge);
             graphEdges.push_back(edge);
         }
-        for (auto &out_data : layer->outData) {
-            if (getInputTo(out_data).empty()) {
-                unused_data.insert(out_data);
+
+        if (!MKLDNNPlugin::one_of(op->get_type_info(),
+                ngraph::op::v0::Result::type_info,
+                ngraph::op::v3::Assign::type_info,
+                ngraph::op::v6::Assign::type_info)) {
+            for (int oi = 0; oi < op->get_output_size(); oi++) {
+                if (op->get_output_target_inputs(oi).empty()) {
+                    unusedOutputs.push_back(op->output(oi));
+                }
             }
         }
     }
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Outputs");
-
-    OutputsDataMap outputs = network.getOutputsInfo();
-    for (const auto &output : outputs) {
-        const auto data = output.second;
-
-        auto parent_layer = getCreatorLayer(data).lock();
-        auto parent_node = layer2node[parent_layer];
-
-        CNNLayerPtr layer(new CNNLayer({"out_" + output.first, "Output", data->getTensorDesc().getPrecision()}));
-        layer->insData.push_back(data);
-
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
-
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), 0));
-        node->addEdge(edge);
+    // Add stub output node for unused outputs
+    for (auto unusedOutput : unusedOutputs) {
+        auto parentNode = op2node[unusedOutput.get_node_shared_ptr()];
+        const auto port = unusedOutput.get_index();
+        const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
+        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outDims[port].ToSizeVector(),
+                                                                        parentNode->getOriginalOutputPrecisionAtPort(port),
+                                                                        nodeName, "Result", getEngine(), weightsCache);
+        MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
+        outNode->addEdge(edge);
         graphEdges.push_back(edge);
-
-        graphNodes.push_back(node);
-        outputNodes.push_back(node);
-
-        unused_data.erase(data);
+        graphNodes.push_back(outNode);
     }
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "AddStubs");
+    // We set all non const data paths precision to BF16 in case enforceBF16 flag is switched on.
+    if (config.enforceBF16) {
+        bool isQuantizedModel = false;
+        for (auto& node : graphNodes) {
+            if (node->getType() == FakeQuantize)
+                isQuantizedModel = true;
+        }
 
-    // Add stub output node for unused data
-    for (auto to_stub_data : unused_data) {
-        auto parent_layer = getCreatorLayer(to_stub_data).lock();
-        auto parent_node = layer2node[parent_layer];
+        // Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
+        // only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
+        if (implication(isQuantizedModel, config.manualEnforceBF16)) {
+            for (auto &node : graphNodes) {
+                if (node->getType() != Input && node->getType() != Output) {
+                    for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {
+                        auto &parent = node->getParentEdgesAtPort(i)[0]->getParent();
+                        if (!(parent->getType() == Input && parent->isConstant()) && node->getOriginalInputPrecisionAtPort(i) == Precision::FP32)
+                            node->setOriginalInputPrecisionAtPort(i, Precision::BF16);
+                    }
 
-        CNNLayerPtr layer(new CNNLayer({"stub_" + parent_layer->name, "Output", to_stub_data->getTensorDesc().getPrecision()}));
-        layer->insData.push_back(to_stub_data);
-
-        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
-
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(to_stub_data), 0));
-        node->addEdge(edge);
-        graphEdges.push_back(edge);
-        graphNodes.push_back(node);
-    }
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "Inputs");
-
-    // Replicate input nodes
-    for (const auto& input : inputs) {
-        auto inputLayer = getCreatorLayer(input.second->getInputData()).lock();
-        inputNodes[input.first] = layer2node[inputLayer];
-
-        // Loading mean images
-        MKLDNNDims outDims;
-        if (!inputNodes[input.first]->getChildEdgeAt(0)->getDims().ndims())
-            outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
-        else
-            outDims = MKLDNNDims(inputNodes[input.first]->getChildEdgeAt(0)->getDims());
-        if (inputs.find(input.first) != inputs.end()) {
-            InputInfo::Ptr ii = inputs[input.first];
-            if (ii && ii->getPreProcess().getNumberOfChannels()) {
-                _meanImages[input.first].Load(outDims, ii);
+                    for (size_t i = 0; i < node->getOriginalOutputsNumber(); i++) {
+                        if (node->getOriginalOutputPrecisionAtPort(i) == Precision::FP32)
+                            node->setOriginalOutputPrecisionAtPort(i, Precision::BF16);
+                    }
+                }
             }
         }
     }
+
+    // change precision for input/output nodes to avoid extra data conversion when set input/output blobs
+    // also we need to change input/output precisions for consumers/producers to avoid inserting reorder
+    for (auto &input : inputNodesMap) {
+        const auto precToSet = normalizeToSupportedPrecision(inputsInfo.at(input.first)->getPrecision());
+        input.second->setOriginalOutputPrecisionAtPort(0, precToSet);
+        const auto childEdges = input.second->getChildEdgesAtPort(0);
+        for (size_t i = 0; i < childEdges.size(); i++) {
+            const auto child = childEdges[i]->getChild();
+            if (child->getOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum()) != Precision::BF16)
+                child->setOriginalInputPrecisionAtPort(childEdges[i]->getOutputNum(), precToSet);
+        }
+    }
+
+    for (auto &output : outputNodesMap) {
+        const auto precToSet = normalizeToSupportedPrecision(outputsInfo.at(output.first)->getPrecision());
+        output.second->setOriginalInputPrecisionAtPort(0, precToSet);
+        const auto parentEdges = output.second->getParentEdgesAtPort(0);
+        for (size_t i = 0; i < parentEdges.size(); i++) {
+            const auto parent = parentEdges[i]->getParent();
+            parent->setOriginalOutputPrecisionAtPort(parentEdges[i]->getInputNum(), precToSet);
+        }
+    }
+
+    // Loading mean images
+    for (const auto& input : inputsInfo) {
+        MKLDNNDims outDims;
+        if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims()) {
+            outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
+        } else {
+            outDims = inputNodesMap[input.first]->getChildEdgeAt(0)->getDims();
+        }
+        InputInfo::Ptr ii = inputsInfo[input.first];
+        if (ii && ii->getPreProcess().getNumberOfChannels()) {
+            _meanImages[input.first].Load(outDims, ii);
+        }
+    }
 }
 
 void MKLDNNGraph::InitGraph() {
@@ -349,11 +350,6 @@ void MKLDNNGraph::InitGraph() {
 
     CreatePrimitives();
 
-    SetOriginalLayerNames();
-
-    if (!config.dumpToDot.empty())
-        dumpToDotFile(config.dumpToDot + "_init.dot");
-
 #ifndef CPU_DEBUG_CAPS
     for (auto &graphNode : graphNodes) {
         graphNode->cleanup();
@@ -366,31 +362,6 @@ void MKLDNNGraph::InitGraph() {
     ExecuteConstantNodesOnly();
 }
 
-void MKLDNNGraph::SetOriginalLayerNames() {
-    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
-
-    // Do it before cleanup. Because it will lose original layers information
-    for (auto &graphNode : graphNodes) {
-        auto nodeType = graphNode->getType();
-        if (nodeType == Reorder || nodeType == Output) continue;
-
-        if (graphNode->getOriginalLayers().empty()) {
-            graphNode->addOriginalLayer(graphNode->getCnnLayer());
-        }
-
-        if (graphNode->getFusedWith().size() || graphNode->getMergeWith().size()) {
-            // Original layer names
-            std::vector<MKLDNNNodePtr> internal = graphNode->getFusedWith();
-            auto &merged = graphNode->getMergeWith();
-            internal.insert(internal.end(), merged.begin(), merged.end());
-
-            for (auto &sub_node : internal) {
-                graphNode->addOriginalLayer(sub_node->getCnnLayer());
-            }
-        }
-    }
-}
-
 void MKLDNNGraph::InitNodes() {
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
     for (auto &node : graphNodes) {
@@ -500,7 +471,7 @@ void MKLDNNGraph::InitEdges() {
 
     std::unordered_set<std::string> uniqueLayerNames;
     for (auto node : graphNodes) {
-        uniqueLayerNames.insert(node->getCnnLayer()->name);
+        uniqueLayerNames.insert(node->getName());
     }
 
     for (auto i = 0; i < numberOfEdges; i++) {
@@ -510,14 +481,17 @@ void MKLDNNGraph::InitEdges() {
 
             // Check if there is a reorder that supports the type conversion
             if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() &&
-                !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
-                //If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
-                std::string convertName = edge->getParent()->getName() + "_" +
-                                          edge->getInputDesc().getPrecision().name() + "_" + edge->getOutputDesc().getPrecision().name();
+                    !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
+                // If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
+                const auto inDesc = edge->getInputDesc();
+                const auto outDesc = edge->getOutputDesc();
 
-                CNNLayerPtr convert(new CNNLayer(LayerParams{convertName, "Convert", edge->getInputDesc().getPrecision()}));
-                auto convertNode = std::make_shared<MKLDNNConvertNode>(convert, this->getEngine(), this->weightsCache);
-                convertNode->setDescs(edge->getInputDesc(), edge->getOutputDesc());
+                std::string convertName = edge->getParent()->getName() + "_" +
+                                          inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name();
+
+                auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getDims(), inDesc.getPrecision(), outDesc.getPrecision(), convertName,
+                                                                       this->getEngine(), this->weightsCache);
+                convertNode->setDescs(inDesc, outDesc);
                 InsertNode(edge, convertNode, true);
 
                 //Check if reorder is still needed
@@ -741,8 +715,8 @@ void MKLDNNGraph::CreatePrimitives() {
 void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) {
     if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready.";
 
-    auto input = inputNodes.find(name);
-    if (input != inputNodes.end()) {
+    auto input = inputNodesMap.find(name);
+    if (input != inputNodesMap.end()) {
         MKLDNNDims outDims = input->second->getChildEdgeAt(0)->getDims();
 
         const void *ext_data_ptr = in->cbuffer();
@@ -774,11 +748,12 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
     if (!IsReady())
         IE_THROW() << "Wrong state. Topology not ready.";
 
-    for (MKLDNNNodePtr &node : outputNodes) {
-        // remove out_ from node name
-        std::string name = node->getName().substr(4);
+    for (auto &outputMap : outputNodesMap) {
+        auto name = outputMap.first;
+        auto node = outputMap.second;
         const MKLDNNMemory& intr_blob = node->getParentEdgeAt(0)->getMemory();
         if (out.find(name) == out.end()) {
+            // TODO [NM]: Do we really need this path?
             // TODO: Create blob from MemoryDesc
             Blob::Ptr outBlob = make_shared_blob<float>({Precision::FP32, node->getParentEdgeAt(0)->getDims().ToSizeVector(),
                                                          TensorDesc::getLayoutByDims(node->getParentEdgeAt(0)->getDims().ToSizeVector())},
@@ -816,7 +791,29 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
             MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
         size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB;
 
-        cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
+        const auto actualDesc = node->getParentEdgeAt(0)->getDesc();
+        const auto expectedDesc = ext_blob->getTensorDesc();
+
+        // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
+        // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
+        bool isScalarOutput = false;
+        if (actualDesc.getLayout() == SCALAR) {
+            isScalarOutput = expectedDesc.getLayout() == SCALAR ||
+                             std::accumulate(expectedDesc.getDims().begin(), expectedDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1;
+        } else if (expectedDesc.getLayout() == SCALAR) {
+            isScalarOutput = actualDesc.getLayout() == SCALAR ||
+                             std::accumulate(actualDesc.getDims().begin(), actualDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1;
+        }
+
+        if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
+            auto outBlobDesc = MKLDNNMemoryDesc{expectedDesc};
+            auto outBloMem = MKLDNNMemory(eng);
+            outBloMem.Create(outBlobDesc, ext_blob_ptr, false);
+
+            outBloMem.SetData(intr_blob, 0, false);
+        } else {
+            cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
+        }
     }
 }
 
@@ -966,8 +963,6 @@ void MKLDNNGraph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEn
     for (int i = 1; i < graphNodes.size(); i++) {
         getPerfMapFor(perfMap, graphNodes[i]);
     }
-
-    if (!config.dumpToDot.empty()) dumpToDotFile(config.dumpToDot + "_perf.dot");
 }
 
 void MKLDNNGraph::setConfig(const Config &cfg) {
@@ -983,18 +978,14 @@ Config MKLDNNGraph::getProperty() const {
 }
 
 void MKLDNNGraph::getInputBlobs(InferenceEngine::BlobMap &resp) {
-    for (auto &it : inputNodes) {
-        MKLDNNInputNode* node = dynamic_cast<MKLDNNInputNode*>(it.second.get());
-        if (!node || node->isConstant())
-            continue;
-        resp[it.first] = node->getChildEdgeAt(0)->getBlob();
+    for (auto &it : inputNodesMap) {
+        resp[it.first] = it.second->getChildEdgeAt(0)->getBlob();
     }
 }
 
 void MKLDNNGraph::getOutputBlobs(InferenceEngine::BlobMap &resp) {
-    for (auto &it : outputNodes) {
-        std::string name = it->getName().substr(4);
-        resp[name] = it->getParentEdgeAt(0)->getBlob();
+    for (auto &it : outputNodesMap) {
+        resp[it.first] = it.second->getParentEdgeAt(0)->getBlob();
     }
 }
 
@@ -1150,10 +1141,7 @@ void MKLDNNGraph::RemoveDroppedEdges() {
 
 MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc,
                                 bool isOptimized, InferenceEngine::Blob::Ptr scales) {
-    CNNLayerPtr layer(new CNNLayer({layerName,
-                                    "Reorder",
-                                    inDesc.getPrecision()}));
-    MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache));
+    MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layerName, getEngine(), weightsCache));
     auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
     if (reorderPtr == nullptr) {
         IE_THROW() << "MKLDNNGraph::InsertReorder: Cannot cast to MKLDNNReorderNode";
@@ -1165,7 +1153,7 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa
     InsertNode(edge, newReorder, true);
 
     // Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
-    // Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks.
+    // Due to the specificity of MKLDNNGraphOptimizer::MergeTransposeAndReorder() that isOptimized flag uses, we shouldn't do these checks.
     if (!isOptimized) {
         newReorder->getParentEdgeAt(0)->getDesc();
         newReorder->getChildEdgeAt(0)->getDesc();
@@ -1218,15 +1206,6 @@ InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
     return dump_graph_as_ie_ngraph_net(*this);
 }
 
-void MKLDNNGraph::dumpToDotFile(std::string file) const {
-    std::ofstream dot;
-    dot.open(file);
-    if (!dot.is_open()) IE_THROW() << "CPU Plugin cannot create dot file " << file << ".";
-
-    dump_graph_as_dot(*this, dot);
-    dot.close();
-}
-
 void MKLDNNGraph::printGraphInfo() const {
     for (auto &graphNode : graphNodes) {
         std::cout << "name: " << graphNode->getName() << " [ ";
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
index ae2532dbee5..4a82f9c26b0 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -4,14 +4,12 @@
 
 #pragma once
 
-#include "ie_parallel.hpp"
 #include "cpp/ie_cnn_network.h"
 #include "config.h"
 #include "mkldnn_memory.h"
 #include "mean_image.h"
 #include "mkldnn_node.h"
 #include "mkldnn_edge.h"
-#include "threading/ie_thread_local.hpp"
 #include <map>
 #include <string>
 #include <vector>
@@ -48,7 +46,7 @@ public:
     void getOutputBlobs(InferenceEngine::BlobMap &out_map);
 
     template<typename NET>
-    void CreateGraph(const NET &network,
+    void CreateGraph(NET &network,
                      const MKLDNNExtensionManager::Ptr& extMgr,
                      MKLDNNWeightsSharing::Ptr &w_cache);
 
@@ -73,15 +71,14 @@ public:
         return graphEdges;
     }
 
-    std::vector<MKLDNNNodePtr>& GetOutputNodes() {
-        return outputNodes;
+    std::map<std::string, MKLDNNNodePtr>& GetInputNodesMap() {
+        return inputNodesMap;
     }
 
-    std::map<std::string, MKLDNNNodePtr>& GetInputNodes() {
-        return inputNodes;
+    std::map<std::string, MKLDNNNodePtr>& GetOutputNodesMap() {
+        return outputNodesMap;
     }
 
-
     mkldnn::engine getEngine() const {
         return eng;
     }
@@ -152,9 +149,6 @@ public:
 
     InferenceEngine::CNNNetwork dump() const;
 
-    template<typename NET>
-    static void ApplyUnrollPasses(NET &net);
-
     void ResetInferCount() { infer_count = 0; }
 
     void SortTopologically();
@@ -166,8 +160,8 @@ protected:
         status = NotReady;
         eng = mkldnn::engine(mkldnn::engine::kind::cpu, 0);
 
-        inputNodes.clear();
-        outputNodes.clear();
+        inputNodesMap.clear();
+        outputNodesMap.clear();
         graphNodes.clear();
         graphEdges.clear();
         _meanImages.clear();
@@ -183,8 +177,8 @@ protected:
 
     MKLDNNMemoryPtr memWorkspace;
 
-    std::map<std::string, MKLDNNNodePtr> inputNodes;
-    std::vector<MKLDNNNodePtr> outputNodes;
+    std::map<std::string, MKLDNNNodePtr> inputNodesMap;
+    std::map<std::string, MKLDNNNodePtr> outputNodesMap;
     std::vector<MKLDNNNodePtr> graphNodes;
     std::vector<MKLDNNEdgePtr> graphEdges;
 
@@ -194,7 +188,7 @@ protected:
     static mkldnn::engine eng;
 
     void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr);
-    void Replicate(const InferenceEngine::TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr);
+    void Replicate(const std::shared_ptr<const ngraph::Function> &subgraph, const MKLDNNExtensionManager::Ptr& extMgr);
     void InitGraph();
     void InitNodes();
     void InitDescriptors();
@@ -204,22 +198,13 @@ protected:
     void AllocateWithReuse();
     void CreatePrimitives();
     void ExecuteConstantNodesOnly();
-    void SetOriginalLayerNames();
 
     friend class MKLDNNInferRequest;
     friend class MKLDNNGraphlessInferRequest;
-    friend InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph);
     friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
 
 private:
-    void dumpToDotFile(std::string file) const;
     void printGraphInfo() const;
-
-    struct ParsedLayer {
-        MKLDNNNodePtr parent;
-        InferenceEngine::CNNLayerPtr cnnLayer;
-        size_t outIdx;
-    };
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
index c22512cd972..14d2f6a28ae 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
@@ -3,8 +3,6 @@
 //
 
 #include "mkldnn_graph_dumper.h"
-#include <legacy/cnn_network_impl.hpp>
-#include <legacy/ie_util_internal.hpp>
 #include <ie_ngraph_utils.hpp>
 #include "exec_graph_info.hpp"
 #include "mkldnn_debug.h"
@@ -22,188 +20,6 @@ namespace MKLDNNPlugin {
 
 namespace {
 
-std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &);
-void drawer_callback(const InferenceEngine::CNNLayerPtr, ordered_properties &, ordered_properties &);
-
-}  // namespace
-
-CNNLayer::Ptr create_cnnlayer(const MKLDNNNodePtr &node) {
-    CNNLayer::Ptr layer(new CNNLayer({node->getName(), "type", Precision::FP32}));
-
-    layer->params = extract_node_metadata(node);
-    layer->type = layer->params[ExecGraphInfoSerialization::LAYER_TYPE];
-    layer->params.erase(ExecGraphInfoSerialization::LAYER_TYPE);
-
-    auto &cfg = node->getSelectedPrimitiveDescriptor()->getConfig();
-    layer->insData.resize(cfg.inConfs.size());
-    layer->outData.resize(cfg.outConfs.size());
-
-    return layer;
-}
-
-InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph) {
-    std::map<MKLDNNNodePtr, std::shared_ptr<ngraph::Node> > node2layer;
-
-    ngraph::ResultVector results;
-    ngraph::ParameterVector params;
-    ngraph::NodeVector to_hold;
-
-    auto get_inputs = [&] (const MKLDNNNodePtr & node) {
-        auto pr_edges = node->getParentEdges();
-        ngraph::OutputVector inputs(pr_edges.size());
-
-        for (int i = 0; i < pr_edges.size(); i++) {
-            auto edge = node->getParentEdgeAt(i);
-            int pr_port = edge->getInputNum();
-            int ch_port = edge->getOutputNum();
-            auto pr_node = edge->getParent();
-
-            IE_ASSERT(node2layer.count(pr_node) == 1);
-            auto pr = node2layer[pr_node];
-
-            inputs[ch_port] = pr->output(pr_port);
-        }
-
-        return inputs;
-    };
-
-    auto create_ngraph_node = [&](const MKLDNNNodePtr &node) {
-        bool is_input = false, is_output = false, should_be_hold = false;
-        for (auto && kvp : graph.inputNodes) {
-            if (kvp.second == node) {
-                is_input = true;
-                break;
-            }
-        }
-
-        for (auto && onode : graph.outputNodes) {
-            if (onode == node) {
-                is_output = true;
-                break;
-            }
-        }
-
-        if (!is_output && node->getChildEdges().empty()) {
-            // The node has no consumer and is not an output.
-            // Should be hold in other irregular way.
-            should_be_hold = true;
-        }
-
-        auto meta_data = extract_node_metadata(node);
-        std::shared_ptr<ngraph::Node> return_node;
-        if (is_input) {
-            auto desc = node->getChildEdgeAt(0)->getDesc();
-            auto param = std::make_shared<ngraph::op::Parameter>(
-                details::convertPrecision(desc.getPrecision()),
-                ngraph::PartialShape(desc.getDims()));
-            return_node = param;
-            params.push_back(param);
-        } else if (is_output) {
-            results.emplace_back(std::make_shared<ngraph::op::Result>(get_inputs(node).back()));
-            return_node = results.back();
-        } else {
-            return_node = std::make_shared<ExecGraphInfoSerialization::ExecutionNode>(
-                get_inputs(node), node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size());
-
-            for (size_t port = 0; port < return_node->get_output_size(); ++port) {
-                auto desc = node->getChildEdgeAt(port)->getDesc();
-                return_node->set_output_type(port,
-                    details::convertPrecision(desc.getPrecision()),
-                    ngraph::PartialShape(desc.getDims()));
-            }
-        }
-
-        if (should_be_hold) {
-            to_hold.push_back(return_node);
-        }
-
-        for (auto && kvp : meta_data)
-            return_node->get_rt_info()[kvp.first] = std::make_shared<::ngraph::VariantWrapper<std::string>>(kvp.second);
-        return_node->set_friendly_name(node->getName());
-
-        return return_node;
-    };
-
-    ngraph::NodeVector nodes;
-    nodes.reserve(graph.graphNodes.size());
-    for (auto &node : graph.graphNodes) {  // important: graph.graphNodes are in topological order
-        nodes.emplace_back(create_ngraph_node(node));
-        node2layer[node] = nodes.back();
-    }
-
-    auto holder = results[0];
-    for (auto &node : to_hold) {
-        holder->add_control_dependency(node);
-    }
-
-    auto function = std::make_shared<ngraph::Function>(results, params, graph._name);
-    InferenceEngine::CNNNetwork net(function);
-    return net;
-}
-
-InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph) {
-    auto net = std::make_shared<details::CNNNetworkImpl>();
-
-    net->setName(graph._name);
-    std::map<MKLDNNNodePtr, CNNLayerPtr> node2layer;
-
-    // Copy all nodes to network
-    for (auto &node : graph.graphNodes) {
-        auto layer = create_cnnlayer(node);
-        node2layer[node] = layer;
-        net->addLayer(layer);
-    }
-
-    // Copy all edges to network
-    for (auto &node : graph.graphNodes) {
-        auto pr = node2layer[node];
-        auto ch_edges = node->getChildEdges();
-
-        for (int i = 0; i < ch_edges.size(); i++) {
-            auto edge = node->getChildEdgeAt(i);
-            int in_port = edge->getOutputNum();
-            auto ch_node = edge->getChild();
-            auto ch  = node2layer[ch_node];
-
-            DataPtr data;
-            if (i < pr->outData.size()) {
-                std::string data_name = node->getName() + "_out" + std::to_string(i);
-                pr->outData[i] = std::make_shared<Data>(data_name, edge->getDesc());
-                data = pr->outData[i];
-                getCreatorLayer(data) = pr;
-            } else {
-                data = pr->outData[0];
-            }
-
-            getInputTo(data)[ch->name] = ch;
-            ch->insData[in_port] = data;
-        }
-    }
-
-    // Specify inputs data
-    for (auto kvp : graph.inputNodes) {
-        auto in_node = kvp.second;
-        auto in_layer = node2layer[in_node];
-
-        auto in_info = std::make_shared<InputInfo>();
-        in_info->setInputData(in_layer->outData[0]);
-        net->setInputInfo(in_info);
-    }
-
-    return InferenceEngine::CNNNetwork{net};
-}
-
-void dump_graph_as_dot(const MKLDNNGraph &graph, std::ostream &out) {
-    InferenceEngine::CNNNetwork dump_net = dump_graph_as_ie_net(graph);
-    InferenceEngine::saveGraphToDot(dump_net, out, drawer_callback);
-}
-
-//**********************************
-// Special converters of meta data
-//**********************************
-
-namespace {
-
 std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &node) {
     std::map<std::string, std::string> serialization_info;
 
@@ -289,39 +105,106 @@ std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &no
     return serialization_info;
 }
 
-const char BLUE[]  = "#D8D9F1";
-const char GREEN[] = "#D9EAD3";
-
-void drawer_callback(const InferenceEngine::CNNLayerPtr layer,
-                     ordered_properties &printed_properties,
-                     ordered_properties &node_properties) {
-    const auto &params = layer->params;
-
-    // Implementation
-    auto impl = params.find(ExecGraphInfoSerialization::IMPL_TYPE);
-    if (impl != params.end()) {
-        printed_properties.push_back({"impl", impl->second});
-    }
-
-    // Original names
-    auto orig = params.find(ExecGraphInfoSerialization::ORIGINAL_NAMES);
-    if (orig != params.end()) {
-        printed_properties.push_back({"originals", orig->second});
-    }
-
-    // Precision
-    auto prec = params.find(ExecGraphInfoSerialization::OUTPUT_PRECISIONS);
-    if (prec != params.end()) {
-        printed_properties.push_back({"precision", prec->second});
-        // Set color
-        node_properties.push_back({"fillcolor", prec->second == "FP32" ? GREEN : BLUE});
-    }
-
-    // Set xlabel containing PM data if calculated
-    auto perf = layer->params.find(ExecGraphInfoSerialization::PERF_COUNTER);
-    node_properties.push_back({"xlabel", (perf != layer->params.end()) ? perf->second : ""});
-}
-
 }  // namespace
 
+InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph) {
+    std::map<MKLDNNNodePtr, std::shared_ptr<ngraph::Node> > node2layer;
+
+    ngraph::ResultVector results;
+    ngraph::ParameterVector params;
+    ngraph::NodeVector to_hold;
+
+    auto get_inputs = [&] (const MKLDNNNodePtr & node) {
+        auto pr_edges = node->getParentEdges();
+        ngraph::OutputVector inputs(pr_edges.size());
+
+        for (int i = 0; i < pr_edges.size(); i++) {
+            auto edge = node->getParentEdgeAt(i);
+            int pr_port = edge->getInputNum();
+            int ch_port = edge->getOutputNum();
+            auto pr_node = edge->getParent();
+
+            IE_ASSERT(node2layer.count(pr_node) == 1);
+            auto pr = node2layer[pr_node];
+
+            inputs[ch_port] = pr->output(pr_port);
+        }
+
+        return inputs;
+    };
+
+    auto create_ngraph_node = [&](const MKLDNNNodePtr &node) {
+        bool is_input = false, is_output = false, should_be_hold = false;
+        for (auto && kvp : graph.inputNodesMap) {
+            if (kvp.second == node) {
+                is_input = true;
+                break;
+            }
+        }
+
+        for (auto && kvp : graph.outputNodesMap) {
+            if (kvp.second == node) {
+                is_output = true;
+                break;
+            }
+        }
+
+        if (!is_output && node->getChildEdges().empty()) {
+            // The node has no consumer and is not an output.
+            // Should be hold in other irregular way.
+            should_be_hold = true;
+        }
+
+        auto meta_data = extract_node_metadata(node);
+        std::shared_ptr<ngraph::Node> return_node;
+        if (is_input) {
+            auto desc = node->getChildEdgeAt(0)->getDesc();
+            auto param = std::make_shared<ngraph::op::Parameter>(
+                details::convertPrecision(desc.getPrecision()),
+                ngraph::PartialShape(desc.getDims()));
+            return_node = param;
+            params.push_back(param);
+        } else if (is_output) {
+            results.emplace_back(std::make_shared<ngraph::op::Result>(get_inputs(node).back()));
+            return_node = results.back();
+        } else {
+            return_node = std::make_shared<ExecGraphInfoSerialization::ExecutionNode>(
+                get_inputs(node), node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size());
+
+            for (size_t port = 0; port < return_node->get_output_size(); ++port) {
+                auto desc = node->getChildEdgeAt(port)->getDesc();
+                return_node->set_output_type(port,
+                    details::convertPrecision(desc.getPrecision()),
+                    ngraph::PartialShape(desc.getDims()));
+            }
+        }
+
+        if (should_be_hold) {
+            to_hold.push_back(return_node);
+        }
+
+        for (auto && kvp : meta_data)
+            return_node->get_rt_info()[kvp.first] = std::make_shared<::ngraph::VariantWrapper<std::string>>(kvp.second);
+        return_node->set_friendly_name(node->getName());
+
+        return return_node;
+    };
+
+    ngraph::NodeVector nodes;
+    nodes.reserve(graph.graphNodes.size());
+    for (auto &node : graph.graphNodes) {  // important: graph.graphNodes are in topological order
+        nodes.emplace_back(create_ngraph_node(node));
+        node2layer[node] = nodes.back();
+    }
+
+    auto holder = results[0];
+    for (auto &node : to_hold) {
+        holder->add_control_dependency(node);
+    }
+
+    auto function = std::make_shared<ngraph::Function>(results, params, graph._name);
+    InferenceEngine::CNNNetwork net(function);
+    return net;
+}
+
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
index e68c8a99be6..d954695baaa 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
@@ -11,9 +11,6 @@
 
 namespace MKLDNNPlugin {
 
-void dump_graph_as_dot(const MKLDNNGraph &graph, std::ostream &out);
-
-InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph);
 InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
index 781120774ba..2d17b2b35df 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -12,17 +12,18 @@
 #include "nodes/mkldnn_reorder_node.h"
 #include "nodes/mkldnn_conv_node.h"
 #include "nodes/mkldnn_bin_conv_node.h"
-#include "nodes/mkldnn_quantize_node.h"
+#include "nodes/mkldnn_fake_quantize_node.h"
 #include "nodes/mkldnn_mvn_node.h"
-#include <nodes/mkldnn_permute_node.h>
+#include <nodes/mkldnn_transpose_node.h>
 #include "nodes/mkldnn_interpolate_node.h"
 #include "nodes/mkldnn_input_node.h"
+#include "nodes/common/cpu_convert.h"
 
 #include "mkldnn/ie_mkldnn.h"
 
 #include <blob_factory.hpp>
-#include <legacy/ie_layers_internal.hpp>
 #include "utils/general_utils.h"
+#include "utils/cpu_utils.hpp"
 
 // WA for xbyak.h
 #ifdef _WIN32
@@ -50,55 +51,49 @@ using namespace InferenceEngine;
 MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
 
 void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
-    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "MergeTwoEqualScaleShifts");
-    MergeTwoEqualScaleShifts(graph);
+    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "FuseConvolutionAndBias");
+    FuseConvolutionAndBias(graph);
+    graph.RemoveDroppedNodes();
+
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMultiplyAndAdd");
+    FuseMultiplyAndAdd(graph);
+    graph.RemoveDroppedNodes();
+
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseDeconvolutionAndSimpleOperation");
+    FuseDeconvolutionAndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
     OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBroadcastAndEltwise");
     FuseBroadcastAndEltwise(graph);
     graph.RemoveDroppedNodes();
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseClampAndQuantize");
-    FuseClampAndQuantize(graph);
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseClampAndFakeQuantize");
+    FuseClampAndFakeQuantize(graph);
     graph.RemoveDroppedNodes();
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseScaleShiftAndQuantize");
-    FuseScaleShiftAndQuantize(graph);
-    graph.RemoveDroppedNodes();
-
-    MergeGroupConvolution(graph);
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMulAddAndFakeQuantize");
+    FuseMulAddAndFakeQuantize(graph);
     graph.RemoveDroppedNodes();
 
     OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndZeroPoints");
     FuseConvolutionAndZeroPoints(graph);
     graph.RemoveDroppedNodes();
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
-    FuseConvolutionAndDepthwise(graph);
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperationThroughMaxPool");
+    FuseConvolutionAndSimpleOperationThroughMaxPool(graph);
     graph.RemoveDroppedNodes();
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndActivation");
-    FuseConvolutionAndActivation(graph);
-    graph.RemoveDroppedNodes();
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
-    FuseConvolutionAndDepthwise(graph);
-    graph.RemoveDroppedNodes();
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndQuantize");
-    FuseConvolutionAndQuantize(graph);
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperation");
+    // TODO [NM]: While fusing simple operation into any node (except Eltwise) we need to check that other inputs are Constant nodes.
+    FuseConvolutionAndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
     OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
     graph.SortTopologically();
     graph.RemoveDroppedEdges();
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDepthwise");
-    FuseConvolutionAndDepthwise(graph);
-    graph.RemoveDroppedNodes();
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePoolingAndQuantize");
-    FusePoolingAndQuantize(graph);
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePoolingAndFakeQuantize");
+    FusePoolingAndFakeQuantize(graph);
     graph.RemoveDroppedNodes();
 
     OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges");
@@ -109,18 +104,6 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
     FuseConvolutionAndDWConvolution(graph);
     graph.RemoveDroppedNodes();
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBinaryConvolutionAndQuantize");
-    FuseBinaryConvolutionAndQuantize(graph);
-    graph.RemoveDroppedNodes();
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBatchNormWithScale");
-    FuseBatchNormWithScale(graph);
-    graph.RemoveDroppedNodes();
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveIdentityOperator");
-    RemoveIdentityOperator(graph);
-    graph.RemoveDroppedNodes();
-
     OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionSumAndConvolutionSumActivation");
     FuseConvolutionSumAndConvolutionSumActivation(graph);
     graph.RemoveDroppedNodes();
@@ -141,8 +124,8 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
     FuseInterpolateAndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseNormalizeAndSimpleOperation");
-    FuseNormalizeAndSimpleOperation(graph);
+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseNormalizeL2AndSimpleOperation");
+    FuseNormalizeL2AndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
     OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseEltwiseAndSimple");
@@ -156,9 +139,6 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
 void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) {
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations");
 
-    RemoveIOScaleShifts(graph);
-    graph.RemoveDroppedNodes();
-
     DropDoubleReorders(graph);
     graph.RemoveDroppedNodes();
 
@@ -169,27 +149,279 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
     graph.RemoveDroppedNodes();
 #endif
 
-    MergePermuteAndReorder(graph);
+    MergeTransposeAndReorder(graph);
     graph.RemoveDroppedNodes();
 
     graph.RemoveDroppedEdges();
 }
 
+void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSutableParentNode = [](MKLDNNNodePtr node) {
+        return node->getType() == Convolution &&
+               node->getChildEdges().size() == 1 &&
+               node->getParentEdges().size() == 2 &&
+               node->getFusedWith().empty();
+    };
+
+    auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+        if ((parentNode->isConstant() && !childNode->isConstant()) || childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() ||
+            childNode->getParentEdges().size() != 2)
+            return false;
+
+        auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
+        if (biasNode->getChildEdges().size() != 1)
+            return false;
+
+        auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector();
+        auto biasDims = getNormalizedDimsBySize(biasNode->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(),
+                                                convOutDims.size());
+        // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases.
+        // Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant.
+        if (convOutDims.size() != biasDims.size() || biasDims.size() < 2)
+            return false;
+
+        if (biasDims[0] != 1 || biasDims[1] != convOutDims[1])
+            return false;
+
+        for (int i = 2; i < biasDims.size(); i++) {
+            if (biasDims[i] != 1)
+                return false;
+        }
+
+        return true;
+    };
+
+    auto parent = graphNodes.begin();
+    while (parent != graphNodes.end()) {
+        auto parentNode = *parent;
+        if (!isSutableParentNode(parentNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
+        if (!isSutableChildNode(parentNode, childNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childs = childNode->childEdges;
+        auto parents = childNode->parentEdges;
+
+        for (size_t i = 0; i < parents.size(); i++) {
+            auto p_edge = parents[i].lock();
+            if (!p_edge) continue;
+            auto parent = p_edge->getParent();
+            if (!parent) continue;
+
+            if (parent == parentNode) {
+                for (size_t j = 0; j < childs.size(); j++) {
+                    if (!childs[j].lock())
+                        continue;
+                    auto child = childs[j].lock()->getChild();
+                    if (!child)
+                        continue;
+
+                    MKLDNNEdgePtr &remEdge = p_edge;
+                    int inNum = 0;
+                    if (remEdge) {
+                        inNum = remEdge->getInputNum();
+                        remEdge->drop();
+                        removeEdge(graph, remEdge);
+                    }
+                    remEdge = childs[j].lock();
+                    int outNum = 0;
+                    if (remEdge) {
+                        outNum = remEdge->getOutputNum();
+                        remEdge->drop();
+                        removeEdge(graph, remEdge);
+                    }
+                    MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
+                    auto &graphEdges = graph.GetEdges();
+                    graphEdges.push_back(newEdge);
+                    parent->addEdge(newEdge);
+                }
+            } else {
+                MKLDNNEdgePtr &remEdge = p_edge;
+                int inNum = 0;
+                if (remEdge) {
+                    inNum = remEdge->getInputNum();
+                    remEdge->drop();
+                    removeEdge(graph, remEdge);
+                }
+
+                auto parentEltwise = parentNode;
+                MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size()));
+                auto &graphEdges = graph.GetEdges();
+                graphEdges.push_back(newEdge);
+                parent->addEdge(newEdge);
+
+                parent->outDims[inNum] = MKLDNNDims({parentEltwise->outDims[0][1]});
+                parentEltwise->inDims.push_back(parent->outDims[0]);
+            }
+        }
+
+        graph.DropNode(childNode);
+        parentNode->addOriginalLayer(childNode->getOriginalLayers());
+        parentNode->addOriginalInputPrecision(childNode->getOriginalInputPrecisionAtPort(1));
+    }
+}
+
+void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSuitableParentNode = [](MKLDNNNodePtr node) {
+        return node->getType() == Deconvolution && node->getChildEdges().size() == 1 && node->getFusedWith().empty();
+    };
+
+    auto parent = graphNodes.begin();
+    while (parent != graphNodes.end()) {
+        auto parentNode = *parent;
+        if (!isSuitableParentNode(parentNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
+        // at this moment deconvolution supports only depthwise as post op
+        if (!childNode->canBePerformedAsScaleShift(parentNode.get())) {
+            parent++;
+            continue;
+        }
+
+        childNode->fuseInto(parentNode);
+
+        auto parentEdges = childNode->parentEdges;
+        for (auto &parentEdge : parentEdges) {
+            auto p_edge = parentEdge.lock();
+            if (p_edge->getParent()->getType() == Deconvolution)
+                continue;
+
+            removeEdge(graph, p_edge);
+        }
+
+        graph.DropNode(childNode);
+    }
+}
+
+void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSutableSecondInput = [](MKLDNNNodePtr node, MKLDNNDims dataDims) {
+        auto secondInputDims = node->outDims[0];
+        if (secondInputDims.ndims() != dataDims.ndims() || secondInputDims.ndims() < 2)
+            return false;
+
+        if (secondInputDims[0] != 1 || secondInputDims[1] != dataDims[1])
+            return false;
+
+        for (size_t i = 2; i < secondInputDims.ndims(); i++) {
+            if (secondInputDims[i] != 1)
+                return false;
+        }
+
+        return true;
+    };
+
+    auto isSutableParentNode = [&](MKLDNNNodePtr node) {
+        if (node->getAlgorithm() != EltwiseMultiply || !node->getFusedWith().empty() ||
+            node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1)
+            return false;
+
+        return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getDims());
+    };
+
+    auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+        if ((parentNode->isConstant() && !childNode->isConstant()) || childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() ||
+            childNode->getParentEdges().size() != 2)
+            return false;
+
+        return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getDims());
+    };
+
+    auto parent = graphNodes.begin();
+    while (parent != graphNodes.end()) {
+        auto parentNode = *parent;
+        if (!isSutableParentNode(parentNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
+        if (!isSutableChildNode(parentNode, childNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childs = childNode->childEdges;
+        auto parents = childNode->parentEdges;
+
+        for (size_t i = 0; i < parents.size(); i++) {
+            auto p_edge = parents[i].lock();
+            if (!p_edge) continue;
+            auto parent = p_edge->getParent();
+            if (!parent) continue;
+
+            if (parent == parentNode) {
+                for (size_t j = 0; j < childs.size(); j++) {
+                    if (!childs[j].lock())
+                        continue;
+                    auto child = childs[j].lock()->getChild();
+                    if (!child)
+                        continue;
+
+                    MKLDNNEdgePtr &remEdge = p_edge;
+                    int inNum = 0;
+                    if (remEdge) {
+                        inNum = remEdge->getInputNum();
+                        remEdge->drop();
+                        removeEdge(graph, remEdge);
+                    }
+                    remEdge = childs[j].lock();
+                    int outNum = 0;
+                    if (remEdge) {
+                        outNum = remEdge->getOutputNum();
+                        remEdge->drop();
+                        removeEdge(graph, remEdge);
+                    }
+                    MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
+                    auto &graphEdges = graph.GetEdges();
+                    graphEdges.push_back(newEdge);
+                    parent->addEdge(newEdge);
+                }
+            } else {
+                MKLDNNEdgePtr &remEdge = p_edge;
+                int inNum = 0;
+                if (remEdge) {
+                    inNum = remEdge->getInputNum();
+                    remEdge->drop();
+                    removeEdge(graph, remEdge);
+                }
+
+                auto parentEltwise = parentNode;
+                MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size()));
+                auto &graphEdges = graph.GetEdges();
+                graphEdges.push_back(newEdge);
+                parent->addEdge(newEdge);
+
+                parentEltwise->inDims.push_back(parent->outDims[0]);
+            }
+        }
+
+        parentNode->addOriginalInputPrecision(childNode->getOriginalInputPrecisionAtPort(1));
+        parentNode->setAlgorithm(EltwiseMulAdd);
+        parentNode->addOriginalLayer(childNode->getOriginalLayers());
+        graph.DropNode(childNode);
+    }
+}
+
 void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableConvNode = [](MKLDNNNodePtr node) {
-        if (node->getType() != Convolution)
-            return false;
-
-        if (node->getParentEdges().size() < 2)
-            return false;
-
-        auto* convLayer = dynamic_cast<ConvolutionLayer*>(node->getCnnLayer().get());
-        if (convLayer == nullptr)
-            IE_THROW() << "Cannot get convolution layer " << node->getName();
-
-        return true;
+        return node->getType() == Convolution;
     };
 
     auto initializeInputZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0, MKLDNNNodePtr parent1) {
@@ -201,43 +433,53 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
         int OC = node->getChildEdgesAtPort(0)[0]->getDims()[1];
 
         if (parent0->getType() == Eltwise) {
+            if (!parent0->getFusedWith().empty() || !parent1->getFusedWith().empty())
+                return false;
+
             // The plug-in doesn't support FP32 convolution with input/weights zero points.
             // In case weights are in FP32 (or we have zero points on weights which are not supported by INT8 convolution) we cannot use
             // INT8 implementation so we have to disable input zero points fusing as well.
-            auto weightsLayer = parent1->getCnnLayer();
-            if (!weightsLayer || weightsLayer->type != "Const" || weightsLayer->outData[0]->getPrecision() != Precision::I8) {
+            if (parent1->getType() != Input || !parent1->isConstant() || parent1->getOriginalOutputPrecisionAtPort(0) != Precision::I8) {
                 return false;
             }
 
-            auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(parent0.get());
-            if (eltwiseNode->getOpType() != Subtract)
+            if (parent0->getAlgorithm() != Algorithm::EltwiseSubtract)
                 return false;
 
             if (parent0->getParentEdges().size() != 2)
                 return false;
 
-            if (parent0->getParentEdgesAtPort(1)[0]->getParent()->getCnnLayer()->type == "Const") {
-                auto arg0 = parent0->getParentEdgesAtPort(1)[0]->getParent();
-                if (arg0->getCnnLayer()->outData[0]->getPrecision() != Precision::U8)
+            auto arg0 = parent0->getParentEdgesAtPort(1)[0]->getParent();
+            if (arg0->getType() == Input && arg0->isConstant()) {
+                if (arg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8)
                     return false;
 
                 if (parent0->getParentEdgesAtPort(1)[0]->getDims().size() < 2) {
                     return false;
                 }
 
-                if (parent0->getParentEdgesAtPort(1)[0]->getDims()[1] != 1 &&
-                    parent0->getParentEdgesAtPort(1)[0]->getDims()[1] != IC)
+                auto zpDims = parent0->getParentEdgesAtPort(1)[0]->getDims();
+                if (zpDims[0] != 1 || zpDims[1] != IC)
                     return false;
 
+                for (int i = 2; i < zpDims.ndims(); i++) {
+                    if (zpDims[i] != 1)
+                        return false;
+                }
+
                 auto arg1 = parent0->getParentEdgesAtPort(0)[0]->getParent();
-                if (arg1->getCnnLayer()->outData[0]->getPrecision() != Precision::U8)
+                if (arg1->getOriginalOutputPrecisionAtPort(0) != Precision::U8)
                     return false;
 
-                auto zeroPointsBlob = dynamic_cast<TBlob<uint8_t>*>(arg0->getCnnLayer()->blobs["custom"].get());
+                auto zeroPointsConstant = dynamic_cast<MKLDNNInputNode*>(arg0.get());
+                if (zeroPointsConstant == nullptr)
+                    IE_THROW() << "Cannot cast to Input node";
+
+                auto zeroPointsBlob = dynamic_cast<const TBlob<uint8_t>*>(zeroPointsConstant->getConstBlob().get());
                 if (zeroPointsBlob == nullptr)
                     IE_THROW() << "Cannot cast to TBlob internal zero points blob";
 
-                auto zeroPointsData = zeroPointsBlob->buffer().as<uint8_t*>();
+                auto zeroPointsData = zeroPointsBlob->cbuffer().as<uint8_t*>();
                 if (zeroPointsData == nullptr)
                     IE_THROW() << "zeroPointsBlob has not allocated buffer";
 
@@ -258,91 +500,32 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
         return true;
     };
 
-//    auto initializeWeightsZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0) {
-//        auto* convNode = dynamic_cast<MKLDNNConvolutionNode*>(node.get());
-//        if (convNode == nullptr)
-//            IE_THROW() << "Cannot get convolution node " << node->getName();
-//
-//        int OC = node->getChildEdgesAtPort(0)[0]->getDims()[1];
-//
-//        if (parent0->getType() == Eltwise) {
-//            auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(parent0.get());
-//            if (eltwiseNode->getOpType() != Subtract)
-//                return false;
-//
-//            if (parent0->getParentEdges().size() != 2)
-//                return false;
-//
-//            if (parent0->getParentEdgesAtPort(1)[0]->getParent()->getCnnLayer()->type == "Const") {
-//                auto arg0 = parent0->getParentEdgesAtPort(1)[0]->getParent();
-//                if (arg0->getCnnLayer()->outData[0]->getPrecision() != Precision::I8)
-//                    return false;
-//
-//                if (parent0->getParentEdgesAtPort(1)[0]->getDims()[0] != 1 &&
-//                    parent0->getParentEdgesAtPort(1)[0]->getDims()[0] != OC)
-//                    return false;
-//
-//                auto arg1 = parent0->getParentEdgesAtPort(0)[0]->getParent();
-//                if (arg1->getCnnLayer()->outData[0]->getPrecision() != Precision::I8)
-//                    return false;
-//
-//                auto zeroPointsBlob = dynamic_cast<TBlob<int8_t>*>(arg0->getCnnLayer()->blobs["custom"].get());
-//                if (zeroPointsBlob == nullptr)
-//                    IE_THROW() << "Cannot cast to TBlob internal zero points blob";
-//
-//                auto zeroPointsData = zeroPointsBlob->buffer().as<int8_t*>();
-//                if (zeroPointsData == nullptr)
-//                    IE_THROW() << "zeroPointsBlob has not allocated buffer";
-//
-//                for (int j = 0; j < parent0->getParentEdgesAtPort(1)[0]->getDims()[0]; j++) {
-//                    convNode->weightsZeroPoints.push_back(static_cast<float>(zeroPointsData[j]));
-//                }
-//            } else {
-//                return false;
-//            }
-//        } else {
-//            return false;
-//        }
-//
-//        return true;
-//    };
-
     auto initializeOutputCompensation = [](MKLDNNNodePtr node) {
         auto* convNode = dynamic_cast<MKLDNNConvolutionNode*>(node.get());
         if (convNode == nullptr)
             IE_THROW() << "Cannot get convolution node " << node->getName();
 
-        auto * convLayer = dynamic_cast<ConvolutionLayer*>(convNode->getCnnLayer().get());
-        if (convLayer == nullptr)
-            IE_THROW() << "Cannot get eltwise layer " << node->getName();
-
-        for (int i = 0; i < convLayer->insData.size(); i++)
-            if (convLayer->insData[i].lock() == nullptr)
-                IE_THROW() << "Node '"<< node->getName() << "' has invalid input data with index " << i;
-
         if (convNode->inputZeroPoints.empty())
             return;
 
-        auto weightsLayer = getCreatorLayer(convLayer->insData[1].lock()).lock();
-        if (weightsLayer->type != "Const") {
-            weightsLayer = getCreatorLayer(weightsLayer->insData[0].lock()).lock();
-        }
+        auto weightsConstant = dynamic_cast<MKLDNNInputNode*>(convNode->getParentEdgesAtPort(1)[0]->getParent().get());
+        if (!weightsConstant || !weightsConstant->isConstant())
+            return;
 
-
-        auto weightsBlob = dynamic_cast<TBlob<int8_t>*>(weightsLayer->blobs["custom"].get());
+        auto weightsBlob = dynamic_cast<const TBlob<int8_t>*>(weightsConstant->getConstBlob().get());
         if (weightsBlob == nullptr)
             IE_THROW() << "Cannot cast to TBlob internal weights blob";
 
-        auto weightsPtr = weightsBlob->buffer().as<int8_t*>();
+        auto weightsPtr = weightsBlob->cbuffer().as<int8_t*>();
         if (weightsPtr == nullptr)
             IE_THROW() << "weightsBlob has not allocated buffer";
 
-        ptrdiff_t G = convLayer->_group;
-        ptrdiff_t OC = weightsLayer->outData[0]->getDims()[0] / G;
-        ptrdiff_t IC = weightsLayer->outData[0]->getDims()[1];
-        ptrdiff_t KD = weightsLayer->outData[0]->getDims().size() == 5 ? weightsLayer->outData[0]->getDims()[2] : 1;
-        ptrdiff_t KH = weightsLayer->outData[0]->getDims()[weightsLayer->outData[0]->getDims().size() - 2];
-        ptrdiff_t KW = weightsLayer->outData[0]->getDims()[weightsLayer->outData[0]->getDims().size() - 1];
+        ptrdiff_t G = convNode->getGroupNum();
+        ptrdiff_t OC = weightsConstant->outDims[0][0] / G;
+        ptrdiff_t IC = weightsConstant->outDims[0][1];
+        ptrdiff_t KD = weightsConstant->outDims[0].ndims() == 5 ? weightsConstant->outDims[0][2] : 1;
+        ptrdiff_t KH = weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 2];
+        ptrdiff_t KW = weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 1];
 
         for (size_t g = 0; g < G; g++) {
             for (size_t oc = 0; oc < OC; oc++) {
@@ -387,366 +570,22 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
             graph.DropNode(dataEltwise);
         }
 
-// [TODO] Weights zero point is not supported on oneDNN side for the moment
-//        auto weightsEltwise = conv->getParentEdgesAtPort(1)[0]->getParent();
-//        if (initializeWeightsZeroPoints(conv, weightsEltwise)) {
-//            auto p_edge = weightsEltwise->getParentEdgesAtPort(1)[0];
-//            removeEdge(graph, p_edge);
-//
-//            graph.DropNode(weightsEltwise);
-//        }
-
         initializeOutputCompensation(conv);
     }
 }
 
-void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) {
-    for (auto node : graph.GetNodes()) {
-        // Split with at least 2 Convolutions
-        if (!IsOneOf(node->getType(), {Split}) || node->getChildEdges().size() < 2 ||
-                !IsOneOf(node->getChildEdgeAt(0)->getChild()->getType(), {Convolution})) {
-            continue;
-        }
-        bool canBeMerged = true;
-
-        auto& split = node;
-
-        auto convInEdge = split->getChildEdgeAt(0);
-        auto conv = convInEdge->getChild();
-        auto convOutEdge = conv->getChildEdgeAt(0);
-
-        auto convType = conv->getType();
-        auto convInDims = convInEdge->getDims();
-        auto convOutDims = convOutEdge->getDims();
-
-        // Convolutions of same the type with Concat as a child
-        for (size_t i = 1; i < split->getChildEdges().size(); i++) {
-            auto childEdge = split->getChildEdgeAt(i);
-            auto child = childEdge->getChild();
-            Type type = child->getType();
-
-            if (convType != type || child->getChildEdgeAt(0)->getChild()->getType() != Concatenation ||
-                    convOutDims != child->getChildEdgeAt(0)->getDims() || child->getChildEdges().size() != 1 ||
-                    convInDims != childEdge->getDims()) {
-                canBeMerged = false;
-                break;
-            }
-        }
-
-        if (!canBeMerged) continue;
-
-        // TODO: Rewrite topology optimizer at all. it should be clean and understandable
-        auto concat = conv->getChildEdgeAt(0)->getChild();
-        // Merge and remove Convolution
-        while (split->getChildEdges().size() > 1) {
-            auto peerInEdge = split->getChildEdgeAt(1);
-            auto peer = peerInEdge->getChild();
-            conv->mergeWith(peer);
-            convInDims[1] += (peerInEdge->getDims())[1];
-            convOutDims[1] += (peer->getChildEdgeAt(0)->getDims())[1];
-            peer->remove();
-        }
-        conv->inDims[0] = convInDims;
-        conv->outDims[0] = convOutDims;
-
-        conv->fuseWith(split);
-        conv->fuseWith(concat);
-
-        graph.DropNode(split);
-        graph.DropNode(concat);
-    }
-}
-
-//  WA: We need it until LP transformations will not optimize this pattern inside
-void MKLDNNGraphOptimizer::MergeTwoEqualScaleShifts(MKLDNNGraph& graph) {
-    auto& graphNodes = graph.GetNodes();
-
-    auto isSutableScaleShiftNode = [](MKLDNNNodePtr node) {
-        if (node->getType() != Eltwise)
-            return false;
-
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-        if (eltwiseNode == nullptr)
-            IE_THROW() << "Cannot cast " << node->getName() << " to Eltwise node";
-
-        if (eltwiseNode->getChildEdges().size() != 1)
-            return false;
-
-        if (eltwiseNode->getOpType() != MulAdd)
-            return false;
-
-        return true;
-    };
-
-    auto isEqualScaleShiftNodes = [](MKLDNNNodePtr node1, MKLDNNNodePtr node2) {
-        if (node1->getParentEdgeAt(0) != node2->getParentEdgeAt(0))
-            return false;
-
-        auto *eltwiseNode1 = dynamic_cast<MKLDNNEltwiseNode *>(node1.get());
-        auto *eltwiseNode2 = dynamic_cast<MKLDNNEltwiseNode *>(node2.get());
-
-        auto eltwiseLayer1 = eltwiseNode1->getCnnLayer();
-        auto eltwiseLayer2 = eltwiseNode2->getCnnLayer();
-
-        Blob::Ptr scalesBlob1 = eltwiseLayer1->blobs["weights"];
-        Blob::Ptr shiftsBlob1 = eltwiseLayer1->blobs["biases"];
-        Blob::Ptr scalesBlob2 = eltwiseLayer2->blobs["weights"];
-        Blob::Ptr shiftsBlob2 = eltwiseLayer2->blobs["biases"];
-        if (scalesBlob1 == nullptr || shiftsBlob1 == nullptr || scalesBlob2 == nullptr || shiftsBlob2 == nullptr)
-            return false;
-
-        if (scalesBlob1->size() != shiftsBlob1->size() || scalesBlob2->size() != shiftsBlob2->size()
-            || scalesBlob1->size() != scalesBlob2->size()) return false;
-
-        const float *scalesBufferPtr1 = scalesBlob1->buffer().as<float *>();
-        const float *shiftsBufferPtr1 = shiftsBlob1->buffer().as<float *>();
-        const float *scalesBufferPtr2 = scalesBlob2->buffer().as<float *>();
-        const float *shiftsBufferPtr2 = shiftsBlob2->buffer().as<float *>();
-
-        for (int i = 0; i < scalesBlob1->size(); i++)
-            if (scalesBufferPtr1[i] != scalesBufferPtr2[i] || shiftsBufferPtr1[i] != shiftsBufferPtr2[i])
-                return false;
-
-        return true;
-    };
-
-    auto MergeScaleShiftNodes = [&](MKLDNNNodePtr childNode1, MKLDNNNodePtr childNode2) {
-        auto parentNode = childNode2->getParentEdgeAt(0)->getParent();
-        auto ccNode2 = childNode2->getChildEdgeAt(0)->getChild();
-
-        auto parentEdges = childNode2->parentEdges;
-        for (auto &parentEdge : parentEdges) {
-            auto p_edge = parentEdge.lock();
-            if (p_edge->getParent() == parentNode)
-                continue;
-
-            removeEdge(graph, p_edge);
-        }
-
-        graph.DropNode(childNode2);
-
-        MKLDNNEdgePtr remEdge;
-        for (auto edge : parentNode->getChildEdges()) {
-            if (edge.lock()->getChild() == ccNode2) {
-                remEdge = edge.lock();
-                break;
-            }
-        }
-        if (remEdge == nullptr)
-            IE_THROW() << "Edge was not found";
-        remEdge->drop();
-        graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), remEdge), graph.GetEdges().end());
-
-        if (childNode1->getChildEdgeAt(0)->getChild() != ccNode2) {
-            auto iIndex = childNode1->getChildEdgeAt(0)->getInputNum();
-            auto oIndex = remEdge->getOutputNum();
-            MKLDNNEdgePtr newEdge(new MKLDNNEdge(childNode1, ccNode2, iIndex, oIndex));
-            childNode1->addEdge(newEdge);
-            graph.GetEdges().push_back(newEdge);
-        }
-    };
-
-    for (int i = 0; i < graphNodes.size(); i++) {
-        auto parentNode = graphNodes[i];
-        if (parentNode->getChildEdges().size() != 2) continue;
-
-        auto childNode1 = parentNode->getChildEdgeAt(0)->getChild();
-        if (!isSutableScaleShiftNode(childNode1)) continue;
-
-        auto childNode2 = parentNode->getChildEdgeAt(1)->getChild();
-        if (!isSutableScaleShiftNode(childNode2)) continue;
-
-        if (!isEqualScaleShiftNodes(childNode1, childNode2)) continue;
-
-        MergeScaleShiftNodes(childNode1, childNode2);
-    }
-}
-
-void MKLDNNGraphOptimizer::FuseBatchNormWithScale(MKLDNNGraph &graph) {
-    auto &graphNodes = graph.GetNodes();
-
-    for (int i = 0; i < graphNodes.size(); i++) {
-        const auto& bn = graphNodes[i];
-        if (bn->getType() == BatchNormalization) {
-            const auto& outputNodes = graph.GetOutputNodes();
-            const std::string node_name = bn->getName();
-            // Check that the node is not output node
-            if (std::find_if(outputNodes.begin(), outputNodes.end(),
-                            [&node_name](const MKLDNNNodePtr& x) {
-                                return x->getName() == node_name;}) == outputNodes.end()) {
-                if (bn->getChildEdges().size() == 1) {
-                    auto child = bn->getChildEdgeAt(0)->getChild();
-                    if (child->type == Eltwise && child->getCnnLayer()->type == "ScaleShift") {
-                        bn->fuseWith(child);
-
-                        auto parentEdges = child->parentEdges;
-                        for (auto &parentEdge : parentEdges) {
-                            auto p_edge = parentEdge.lock();
-                            if (p_edge->getParent()->getType() == BatchNormalization)
-                                continue;
-
-                            removeEdge(graph, p_edge);
-                        }
-
-                        graph.DropNode(child);
-                    }
-                }
-            }
-        }
-    }
-}
-
-void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
-    auto& graphNodes = graph.GetNodes();
-
-    auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) {
-        auto* binConv = dynamic_cast<MKLDNNBinaryConvolutionNode *>(conv.get());
-        if (binConv) {
-            if (!binConv->canFuse(activation))
-                return false;
-        }
-
-        if (!activation->getCnnLayer())
-            return false;
-
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(activation.get());
-
-        return eltwiseNode &&
-            (eltwiseNode->getOpType() == Relu ||
-            (conv->getCnnLayer()->precision == Precision::FP32 &&
-            IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
-                                               Round, SoftRelu})));
-    };
-
-    for (int i = 0; i < graphNodes.size(); i++) {
-        if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) {
-            auto conv = graphNodes[i];
-
-            auto fuse = [&] (MKLDNNNodePtr relu) {
-                conv->fuseWith(relu);
-            };
-
-            if (conv->getChildEdges().size() == 1) {
-                auto ch1 = conv->getChildEdgeAt(0)->getChild();
-
-                if (isFusingSupported(conv, ch1)) {
-                    fuse(ch1);
-
-                    if (ch1->getChildEdges().size() == 1) {
-                        auto ch2 = ch1->getChildEdgeAt(0)->getChild();
-
-                        if (isFusingSupported(conv, ch2)) {
-                            fuse(ch2);
-                            graph.DropNode(ch2);
-                        }
-                    }
-                    graph.DropNode(ch1);
-                } else {
-                    if (ch1->type == Pooling) {
-                        auto pool = ch1;
-
-                        auto* pLayer = dynamic_cast<PoolingLayer *>(pool->getCnnLayer().get());
-                        if (pLayer == nullptr)
-                            IE_THROW() << "Cannot get pooling layer " << pool->getName();
-                        bool is_max_pool = pLayer->_type == PoolingLayer::PoolType::MAX;
-
-                        if (is_max_pool && pool->getChildEdges().size() == 1) {
-                            auto ch2 = pool->getChildEdgeAt(0)->getChild();
-                            if (isFusingSupported(conv, ch2)) {
-                                fuse(ch2);
-                                graph.DropNode(ch2);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
 static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
-    return childNode->getType() == Quantize &&
+    return childNode->getType() == FakeQuantize &&
         one_of(Precision::BF16,
-            parentNode->getCnnLayer()->precision,
-            childNode->getCnnLayer()->precision,
-            parentNode->getCnnLayer()->outData[0].get()->getPrecision(),
-            childNode->getCnnLayer()->outData[0].get()->getPrecision());
+            parentNode->getOriginalOutputPrecisionAtPort(0),
+            childNode->getOriginalOutputPrecisionAtPort(0));
 }
 
 void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        return node->getType() == FullyConnected &&
-               node->getChildEdges().size() == 1;
-    };
-
-    auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
-        if (!childNode->getCnnLayer())
-            return false;
-
-        if (childNode->getType() == Quantize) {
-            auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(childNode.get());
-            if (quantizeNode == nullptr)
-                IE_THROW() << "Cannot get quantize layer " << childNode->getName();
-
-            if (parentNode->getParentEdgesAtPort(0)[0]->getDims().ndims() != 3) {
-                return !quantizeNode->isBinarization();
-            } else {
-                return (quantizeNode->isInputLowBroadcast() && quantizeNode->isInputHighBroadcast() &&
-                        quantizeNode->isOutputLowBroadcast() && quantizeNode->isOutputHighBroadcast() &&
-                        !quantizeNode->isBinarization());
-            }
-        } else if (childNode->getType() == Eltwise) {
-            auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(childNode.get());
-            if (eltwiseNode == nullptr)
-                IE_THROW() << "Cannot get Eltwise node " << childNode->getName();
-
-            if (IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
-                                                   Hsigmoid, Round, SoftRelu})) {
-                return true;
-            } else if (IsOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu})) {
-                if (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() != 2)
-                    return false;
-
-                if (parentNode->getParentEdgesAtPort(0)[0]->getDims().ndims() != 3) {
-                    return true;
-                } else {
-                    const auto &eltwiseLayer = eltwiseNode->getCnnLayer();
-                    if (eltwiseLayer == nullptr)
-                        IE_THROW() << "Cannot get scale shift layer " << eltwiseNode->getName();
-
-                    if (eltwiseNode->getOpType() != MulAdd)
-                        return false;
-
-                    Blob::Ptr scalesBlob = eltwiseLayer->blobs["weights"];
-                    if (scalesBlob == nullptr)
-                        return false;
-
-                    Blob::Ptr shiftsBlob = eltwiseLayer->blobs["biases"];
-                    if (shiftsBlob == nullptr)
-                        return false;
-
-                    const float *scalesBufferPtr = scalesBlob->buffer().as<float *>();
-                    const float *shiftsBufferPtr = shiftsBlob->buffer().as<float *>();
-
-                    if (scalesBlob->size() != shiftsBlob->size())
-                        return false;
-
-                    for (int i = 1; i < scalesBlob->size(); i++)
-                        if (scalesBufferPtr[0] != scalesBufferPtr[i])
-                            return false;
-
-                    for (int i = 1; i < shiftsBlob->size(); i++)
-                        if (shiftsBufferPtr[0] != shiftsBufferPtr[i])
-                            return false;
-
-                    return true;
-                }
-            }
-        }
-
-        return false;
+        return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getDims().ndims() != 3;
     };
 
     auto parent = graphNodes.begin();
@@ -758,7 +597,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
         }
 
         auto childNode = parentNode->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildNode(parentNode, childNode)) {
+        if (!parentNode->canFuse(childNode)) {
             parent++;
             continue;
         }
@@ -769,9 +608,9 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
-        if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
+        if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) {
             auto parentEdges = childNode->parentEdges;
             for (auto &parentEdge : parentEdges) {
                 auto p_edge = parentEdge.lock();
@@ -786,172 +625,86 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
     }
 }
 
-void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
-    auto& graphNodes = graph.GetNodes();
-
-    auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        bool isSutableConv = (node->getType() == Convolution) &&
-                             node->getCnnLayer()->precision == Precision::FP32;
-        bool isSutableBinConv = node->getType() == BinaryConvolution;
-        return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
-    };
-
-    auto isSutableChildNode = [](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
-        if (childNode->getType() != Eltwise)
-            return false;
-
-        if (!childNode->getCnnLayer())
-            return false;
-
-        auto* binConv = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parentNode.get());
-        if (binConv) {
-            if (!binConv->canFuse(childNode))
-                return false;
-        }
-
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(childNode.get());
-        if (eltwiseNode == nullptr)
-            IE_THROW() << "Cannot get eltwise node " << childNode->getName();
-        return ((eltwiseNode->getOpType() == MulAdd && childNode->getCnnLayer()->blobs.size() == 2) ||
-                (eltwiseNode->getOpType() == Prelu));
-    };
-
-    for (int i = 0; i < graphNodes.size(); i++) {
-        auto conv = graphNodes[i];
-        if (!isSutableParentNode(conv)) continue;
-
-        auto depthwise0 = conv->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildNode(conv, depthwise0)) continue;
-
-        conv->fuseWith(depthwise0);
-
-        if (depthwise0->getChildEdges().size() == 1) {
-            auto depthwise1 = depthwise0->getChildEdgeAt(0)->getChild();
-
-            if (isSutableChildNode(conv, depthwise1)) {
-                conv->fuseWith(depthwise1);
-
-                auto parents = depthwise1->parentEdges;
-                for (size_t j = 0; j < parents.size(); j++) {
-                    auto p_edge = parents[j].lock();
-                    if (p_edge->getParent()->getType() == Eltwise)
-                        continue;
-
-                    removeEdge(graph, p_edge);
-                }
-
-                graph.DropNode(depthwise1);
-            }
-        }
-
-        auto parents = depthwise0->parentEdges;
-        for (size_t j = 0; j < parents.size(); j++) {
-            auto p_edge = parents[j].lock();
-            if (p_edge->getParent()->getType() == Convolution || p_edge->getParent()->getType() == BinaryConvolution)
-                continue;
-
-            removeEdge(graph, p_edge);
-        }
-
-        graph.DropNode(depthwise0);
-    }
-}
-
 void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
-    auto isConvolutionNode = [](MKLDNNNodePtr node) {
+    auto isConvolutionNode = [](const MKLDNNNodePtr &node) {
         return node->getType() == Convolution;
     };
 
-    auto is1x1Convolution = [](ConvolutionLayer* layer) {
-        return layer->_kernel[X_AXIS] == 1 && layer->_kernel[Y_AXIS] == 1;
+    auto is1x1Convolution = [](const std::shared_ptr<MKLDNNConvolutionNode> &conv) {
+        const auto weightRank = conv->getWeightDims().size();
+        return conv->getWeightDims()[weightRank - 1] == 1 && conv->getWeightDims()[weightRank - 2] == 1;
     };
 
     auto isSutableParentConvolution = [&](MKLDNNNodePtr node) {
-        auto *layer = dynamic_cast<ConvolutionLayer *>(node->getCnnLayer().get());
-        if (layer == nullptr)
-            IE_THROW() << "Cannot get convolution layer " << node->getName();
+        const auto conv = std::dynamic_pointer_cast<MKLDNNConvolutionNode>(node);
+        if (conv == nullptr)
+            IE_THROW() << "Cannot cast to convolution node " << node->getName();
 
-        auto* parentConvolutionNode = dynamic_cast<MKLDNNConvolutionNode*>(node.get());
-        if (parentConvolutionNode == nullptr)
-            IE_THROW() << "Cannot get convolution node " << node->getName();
-
-        if (!parentConvolutionNode->weightsZeroPoints.empty())
+        if (!conv->weightsZeroPoints.empty())
             return false;
 
-        // TODO [oneDNN]: is it still valide constrain on conv to fuse in?
-        bool isSupportedParams = layer->_group == 1 &&
-                is1x1Convolution(layer) &&  // TODO [oneDNN] : fusing is permitted only with 1x1 convolutions
-                everyone_is(1, layer->_stride[X_AXIS], layer->_stride[Y_AXIS]) &&
-                everyone_is(Precision::FP32, layer->insData[0].lock()->getPrecision(), layer->outData[0].get()->getPrecision()) &&
+        const auto &strides = conv->getStride();
+        bool isSupportedParams = conv->getGroupNum() == 1 &&
+                is1x1Convolution(conv) &&  // TODO [oneDNN] : fusing is permitted only with 1x1 convolutions
+                everyone_is(1, strides[strides.size() - 1], strides[strides.size() - 2]) &&
+                everyone_is(Precision::FP32, conv->getOriginalInputPrecisionAtPort(0), conv->getOriginalOutputPrecisionAtPort(0)) &&
                 node->getChildEdgeAt(0)->getDims().ndims() == 4;
         if (!isSupportedParams) return false;
 
         return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild());
     };
 
-    auto isSutableChildConvolution = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
-        auto* childLayer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
-        if (childLayer == nullptr)
-            IE_THROW() << "Cannot get convolution layer " << childNode->getName();
+    auto isSutableChildConvolution = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) {
+        const auto convChild = std::dynamic_pointer_cast<MKLDNNConvolutionNode>(childNode);
+        if (convChild == nullptr)
+            IE_THROW() << "Cannot cast to convolution node " << childNode->getName();
 
-        auto* parentLayer = dynamic_cast<ConvolutionLayer*>(parentNode->getCnnLayer().get());
-        if (parentLayer == nullptr)
-            IE_THROW() << "Cannot get convolution layer " << parentNode->getName();
+        const auto convParent = std::dynamic_pointer_cast<MKLDNNConvolutionNode>(parentNode);
+        if (convParent == nullptr)
+            IE_THROW() << "Cannot cast to convolution node " << parentNode->getName();
 
-        if (!everyone_is(Precision::FP32, parentLayer->outData[0].get()->getPrecision(), childLayer->insData[0].lock()->getPrecision(),
-                childLayer->outData[0].get()->getPrecision()))
-            return false;
-
-        if (!everyone_is(Precision::FP32, parentLayer->precision, childLayer->precision))
+        if (!everyone_is(Precision::FP32, convParent->getOriginalOutputPrecisionAtPort(0), convChild->getOriginalInputPrecisionAtPort(0),
+                convChild->getOriginalOutputPrecisionAtPort(0)))
             return false;
 
         auto parentOutputPrecision = !parentNode->fusedWith.empty()
-                ? parentNode->fusedWith[parentNode->fusedWith.size() - 1]->getCnnLayer()->outData[0].get()->getPrecision()
-                : parentNode->getCnnLayer()->outData[0].get()->getPrecision();
+                ? parentNode->fusedWith[parentNode->fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)
+                : parentNode->getOriginalOutputPrecisionAtPort(0);
 
         auto childOutputPrecision = !childNode->fusedWith.empty()
-                ? childNode->fusedWith[childNode->fusedWith.size() - 1]->getCnnLayer()->outData[0].get()->getPrecision()
-                : childNode->getCnnLayer()->outData[0].get()->getPrecision();
+                ? childNode->fusedWith[childNode->fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)
+                : childNode->getOriginalOutputPrecisionAtPort(0);
 
         if (!everyone_is(Precision::FP32, parentOutputPrecision, childOutputPrecision))
             return false;
 
-        auto* childConvolutionNode = dynamic_cast<MKLDNNConvolutionNode*>(childNode.get());
-        if (childConvolutionNode == nullptr)
-            IE_THROW() << "Cannot get convolution node " << childNode->getName();
-
-        if (!childConvolutionNode->inputZeroPoints.empty() || !childConvolutionNode->weightsZeroPoints.empty())
+        if (!convChild->inputZeroPoints.empty() || !convChild->weightsZeroPoints.empty())
             return false;
 
-        bool withBias = (childLayer->_biases != nullptr && childLayer->_biases->size() != 0) ||
-                        childConvolutionNode->getBaseIntputsNumber() == 3;
+        bool withBias = convChild->getOriginalInputPrecisions().size() == 3;
 
-        auto allPads = getPaddings(*childLayer);
-
-        bool isSupportedParams = childLayer->_out_depth == childLayer->_group &&
-                                 childLayer->_out_depth != 1 &&
-                                 everyone_is(3, childLayer->_kernel[X_AXIS], childLayer->_kernel[Y_AXIS]) &&
-                                 everyone_is(1, allPads.begin[X_AXIS], allPads.begin[Y_AXIS]) &&
-                                 everyone_is(1, allPads.end[X_AXIS], allPads.end[Y_AXIS]) &&
-                                 everyone_is(1, childLayer->_dilation[X_AXIS], childLayer->_dilation[Y_AXIS]) &&
-                                 childLayer->_stride[X_AXIS] == childLayer->_stride[Y_AXIS] &&
+        const auto weightRank = convChild->getWeightDims().size();
+        const auto stridesSize = convChild->getStride().size();
+        bool isSupportedParams = convChild->outDims[0][1] == convChild->getGroupNum() &&
+                                 convChild->outDims[0][1] != 1 &&
+                                 everyone_is(3, convChild->getWeightDims()[weightRank - 1], convChild->getWeightDims()[weightRank - 2]) &&
+                                 everyone_is(1, convChild->getPaddingL()[stridesSize - 1], convChild->getPaddingL()[stridesSize - 2]) &&
+                                 everyone_is(1, convChild->getPaddingR()[stridesSize - 1], convChild->getPaddingR()[stridesSize - 2]) &&
+                                 everyone_is(1, convChild->getDilation()[stridesSize - 1] + 1, convChild->getDilation()[stridesSize - 2] + 1) &&
+                                 convChild->getStride()[stridesSize - 1] == convChild->getStride()[stridesSize - 2] &&
                                  withBias &&
-                                 one_of(childLayer->_stride[X_AXIS], 1, 2) &&
+                                 one_of(convChild->getStride()[stridesSize - 1], 1, 2) &&
                                  childNode->getChildEdgeAt(0)->getDims().ndims() == 4;
 
         return isSupportedParams;
     };
 
-    auto isFusingWorthwhile = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
-        auto layer = std::dynamic_pointer_cast<ConvolutionLayer>(childNode->getCnnLayer());
-        if (layer == nullptr)
-            IE_THROW() << "Cannot get convolution layer " << childNode->getName();
-
+    auto isFusingWorthwhile = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) {
         auto inDims = childNode->inDims[0];
         auto outDims = childNode->outDims[0];
-        int elemSize = layer->precision.size();
+        int elemSize = childNode->getOriginalOutputPrecisionAtPort(0).size();
 
         int L3_cache_size = utils::get_cache_size(3, false);
         int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize;
@@ -978,104 +731,24 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
 
         if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue;
 
-        parentConvNode->fuseWith(childConvNode);
+        parentConvNode->addFusedNode(childConvNode);
 
-        for (auto node : childConvNode->getFusedWith())
-            parentConvNode->fuseWith(node);
+        for (auto node : childConvNode->getFusedWith()) {
+            parentConvNode->addFusedNode(node);
+        }
         childConvNode->clearFusedWith();
 
         graph.DropDWConvNode(childConvNode);
     }
 }
 
-void MKLDNNGraphOptimizer::FuseConvolutionAndQuantize(MKLDNNGraph &graph) {
+// TODO [NM]: unite with FuseConvolutionAndSimpleOperation
+void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        bool isSutableBinConv = node->getType() == Convolution;
-
-        if (isSutableBinConv) {
-            auto *convLayer = dynamic_cast<ConvolutionLayer *>(node->getCnnLayer().get());
-            if (convLayer == nullptr)
-                IE_THROW() << "Cannot get convolution layer " << node->getName();
-
-            return isSutableBinConv && node->getChildEdges().size() == 1;
-        } else {
-            return false;
-        }
-    };
-
-    auto isSutableChildNode = [](MKLDNNNodePtr node) {
-        if (!node->getCnnLayer())
-            return false;
-
-        if (node->getType() != Quantize)
-            return false;
-
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot get quantize layer " << node->getName();
-
-        return !quantizeNode->isBinarization();
-    };
-
-    for (int i = 0; i < graphNodes.size(); i++) {
-        auto parent = graphNodes[i];
-        if (!isSutableParentNode(parent)) continue;
-
-        auto child = parent->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildNode(child)) continue;
-
-        //  BF16 Quantize Layer Fusing Disabling
-        if (BF16QuantizeNodeFusing(parent, child))
-            continue;
-
-        parent->fuseWith(child);
-
-        auto parents = child->parentEdges;
-        for (size_t j = 0; j < parents.size(); j++) {
-            auto p_edge = parents[j].lock();
-            if (p_edge->getParent()->getType() == Convolution)
-                continue;
-
-            removeEdge(graph, p_edge);
-        }
-
-        graph.DropNode(child);
-    }
-}
-
-void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) {
-    auto& graphNodes = graph.GetNodes();
-
-    auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        return node->getType() == Convolution &&
-               node->getChildEdges().size() == 1 &&
-               node->getCnnLayer()->precision == Precision::FP32;
-    };
-
-    auto isSutableChildNode = [&](MKLDNNNodePtr node) {
-        if (!node->getCnnLayer())
-            return false;
-
-        if (node->getType() == Quantize) {
-            auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-            if (quantizeNode == nullptr)
-                IE_THROW() << "Cannot get quantize layer " << node->getName();
-
-            return !quantizeNode->isBinarization();
-        } else if (node->getType() == Eltwise) {
-            auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-            if (eltwiseNode == nullptr)
-                IE_THROW() << "Cannot get eltwise node " << node->getName();
-
-            return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) ||
-                    (eltwiseNode->getOpType() == Prelu) ||
-                    IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
-                                                       Hsigmoid, Round, SoftRelu}));
-        }
-
-        return false;
+        return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1 &&
+               node->getOriginalOutputPrecisionAtPort(0) == Precision::FP32;
     };
 
     auto parent = graphNodes.begin();
@@ -1087,7 +760,55 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
         }
 
         auto childNode = parentNode->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildNode(childNode)) {
+        if (childNode->getAlgorithm() != PoolingMax || childNode->getChildEdges().size() != 1) {
+            parent++;
+            continue;
+        }
+
+        auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild();
+        if (parentNode->getType() == BinaryConvolution && !parentNode->canFuse(fuseCandidate)) {
+            parent++;
+            continue;
+        }
+
+        if (!one_of(fuseCandidate->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
+                                                   EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
+                                                   EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu)) {
+            parent++;
+            continue;
+        }
+        parentNode->addFusedNode(fuseCandidate);
+        parentNode->addOriginalLayer(fuseCandidate->getOriginalLayers());
+        auto parentEdges = fuseCandidate->parentEdges;
+        for (auto &parentEdge : parentEdges) {
+            auto p_edge = parentEdge.lock();
+            if (p_edge->getParent() == childNode)
+                continue;
+
+            removeEdge(graph, p_edge);
+        }
+        graph.DropNode(fuseCandidate);
+    }
+}
+
+void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSutableParentNode = [](MKLDNNNodePtr node) {
+        return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1;
+    };
+
+    auto parent = graphNodes.begin();
+    while (parent != graphNodes.end()) {
+        auto parentNode = *parent;
+        if (!isSutableParentNode(parentNode)) {
+            parent++;
+            continue;
+        }
+        const auto parentNodeType = parentNode->getType();
+
+        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
+        if (!parentNode->canFuse(childNode)) {
             parent++;
             continue;
         }
@@ -1098,13 +819,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
-        if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
+        if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) {
             auto parentEdges = childNode->parentEdges;
             for (auto &parentEdge : parentEdges) {
                 auto p_edge = parentEdge.lock();
-                if (p_edge->getParent()->getType() == Convolution)
+                if (p_edge->getParent()->getType() == parentNodeType)
                     continue;
 
                 removeEdge(graph, p_edge);
@@ -1115,86 +836,20 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
     }
 }
 
-void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) {
+void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        bool isSutableBinConv = node->getType() == BinaryConvolution;
-        return isSutableBinConv && node->getChildEdges().size() == 1;
-    };
-
-    auto isSutableChildNode = [](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
-        if (childNode->getType() != Quantize)
-            return false;
-
-        auto* binConv = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parentNode.get());
-        if (!binConv) {
-            return false;
-        }
-
-        return binConv->canFuse(childNode);
-    };
-
-    for (int i = 0; i < graphNodes.size(); i++) {
-        auto parent = graphNodes[i];
-        if (!isSutableParentNode(parent)) continue;
-
-        auto child = parent->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildNode(parent, child)) continue;
-
-        //  BF16 Quantize Layer Fusing Disabling
-        if (BF16QuantizeNodeFusing(parent, child))
-            continue;
-
-        parent->fuseWith(child);
-
-        auto parents = child->parentEdges;
-        for (size_t i = 0; i < parents.size(); i++) {
-            auto p_edge = parents[i].lock();
-            if (p_edge->getParent()->getType() == BinaryConvolution)
-                continue;
-
-            removeEdge(graph, p_edge);
-        }
-
-        graph.DropNode(child);
-    }
-}
-
-void MKLDNNGraphOptimizer::FusePoolingAndQuantize(MKLDNNGraph &graph) {
-    auto& graphNodes = graph.GetNodes();
-
-    auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        bool isSutablePooling = node->getType() == Pooling;
-
-        if (isSutablePooling) {
-            auto *poolingLayer = dynamic_cast<PoolingLayer *>(node->getCnnLayer().get());
-            if (poolingLayer == nullptr)
-                IE_THROW() << "Cannot get Pooling layer " << node->getName();
-
-            // Optimized FP32 Pooling doesn't support fusing with FQ
-            auto inputPrecision = poolingLayer->insData[0].lock()->getPrecision();
-            if (inputPrecision != Precision::U8 && inputPrecision != Precision::I8)
+        if (node->getType() == Pooling) {
+            if (!one_of(node->getOriginalInputPrecisionAtPort(0), Precision::U8, Precision::I8))
                 return false;
-
-            return node->getChildEdges().size() == 1 && poolingLayer->_type == PoolingLayer::AVG;
-        } else {
-            return false;
+            return node->getChildEdges().size() == 1 && node->getAlgorithm() == Algorithm::PoolingAvg;
         }
+        return false;
     };
 
     auto isSutableChildNode = [](MKLDNNNodePtr node) {
-        if (!node->getCnnLayer())
-            return false;
-
-        if (node->getType() != Quantize)
-            return false;
-
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot get quantize layer " << node->getName();
-
-        return !quantizeNode->isBinarization();
+        return node->getType() == FakeQuantize && node->getAlgorithm() != Algorithm::FQBinarization;
     };
 
     for (int i = 0; i < graphNodes.size(); i++) {
@@ -1204,7 +859,7 @@ void MKLDNNGraphOptimizer::FusePoolingAndQuantize(MKLDNNGraph &graph) {
         auto child = parent->getChildEdgeAt(0)->getChild();
         if (!isSutableChildNode(child)) continue;
 
-        parent->fuseWith(child);
+        child->fuseInto(parent);
 
         auto parents = child->parentEdges;
         for (size_t i = 0; i < parents.size(); i++) {
@@ -1288,24 +943,17 @@ static bool is_data_dependency(const std::shared_ptr<MKLDNNNode> &parent,
 void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph) {
     std::vector<MKLDNNNodePtr> &graphNodes = graph.GetNodes();
 
-    auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) {
-        if (!activation->getCnnLayer())
-            return false;
-
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(activation.get());
-
-        return eltwiseNode &&
-            (eltwiseNode->getOpType() == Relu ||
-            (conv->getCnnLayer()->precision == Precision::FP32 &&
-             IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
-                                                Round, SoftRelu})));
+    auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr child) {
+        return child->getType() == Eltwise &&
+                one_of(child->getAlgorithm(), EltwiseRelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseSwish, EltwiseHswish,
+                                              EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, EltwiseRoundHalfAwayFromZero, EltwiseSoftRelu);
     };
 
     for (auto &graphNode : graphNodes) {
         if (graphNode->getType() != Eltwise)
             continue;
 
-        if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isSum()) continue;
+        if (graphNode->getAlgorithm() != EltwiseAdd) continue;
         if (std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isWithBroadcast()) continue;
 
         // TODO: Enlarge to several inputs
@@ -1319,14 +967,33 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
         bool isSutableParent1 = parent1->getType() == Convolution || parent1->getType() == BinaryConvolution;
         bool isSutableParent2 = parent2->getType() == Convolution || parent2->getType() == BinaryConvolution;
 
+        auto canFuseSum = [](MKLDNNBinaryConvolutionNode *binConv, MKLDNNNodePtr fuseCandidate) {
+            if (binConv->getImplType() == impl_desc_type::ref)
+                return false;
+
+            if (binConv->isFusedWith(FakeQuantize))
+                return false;
+
+            if (fuseCandidate->getAlgorithm() == EltwiseAdd) {
+                for (auto& fusedNode : binConv->fusedWith) {
+                    const auto eltwise = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(fusedNode);
+                    if (eltwise && eltwise->isSpecialConvolutionAddFusing()) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            return false;
+        };
+
         auto* binConvNode1 = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parent1.get());
         if (binConvNode1) {
-            isSutableParent1 = isSutableParent1 && binConvNode1->canFuse(graphNode);
+            isSutableParent1 = isSutableParent1 && canFuseSum(binConvNode1, graphNode);
         }
 
         auto* binConvNode2 = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parent2.get());
         if (binConvNode2) {
-            isSutableParent2 = isSutableParent2 && binConvNode2->canFuse(graphNode);
+            isSutableParent2 = isSutableParent2 && canFuseSum(binConvNode2, graphNode);
         }
 
         auto* convNode1 = dynamic_cast<MKLDNNConvolutionNode *>(parent1.get());
@@ -1358,6 +1025,10 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
         if (peerNode->isConstant())
             continue;
         auto sum = graphNode;
+
+        if (mergedConv->isConstant() && !sum->isConstant())
+            continue;
+
         auto lastNode = sum;
 
         bool fuse_allowed = mergedConv->getChildEdges().size() == 1;
@@ -1380,10 +1051,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
                 isFusingSupported(graphNode, graphNode->getChildEdgeAt(0)->getChild())) {
             auto relu_shared = graphNode->getChildEdgeAt(0)->getChild();
             lastNode = relu_shared;
-            mergedConv->fuseWith(sum);
+            if (mergedConv->isConstant() && !lastNode->isConstant())
+                continue;
+            sum->fuseInto(mergedConv);
         }
 
-        mergedConv->fuseWith(lastNode);
+        lastNode->fuseInto(mergedConv);
 
         if (mergedConv->fusedWith.size() > 0 &&
            (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
@@ -1448,40 +1121,16 @@ void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) {
         bool isSutableMVN = (node->getType() == MVN);
 
         if (isSutableMVN) {
-            auto *mvnLayer = dynamic_cast<MVNLayer *>(node->getCnnLayer().get());
-            if (mvnLayer == nullptr)
-                IE_THROW() << "Cannot get MVN layer " << node->getName();
+            auto mvnNode = std::dynamic_pointer_cast<MKLDNNMVNNode>(node);
+            if (mvnNode == nullptr)
+                IE_THROW() << "CPU node with name '" << node->getName() << "' is not a MVN node.";
 
-            return node->getChildEdges().size() == 1 && mvnLayer->across_channels == 0 && mvnLayer->normalize == 1;
+            return mvnNode->getChildEdges().size() == 1 && !mvnNode->getAcrossChannels() && mvnNode->getNormalizeVariance();
         } else {
             return false;
         }
     };
 
-    auto isSutableChildNode = [&](MKLDNNNodePtr node) {
-        if (!node->getCnnLayer())
-            return false;
-
-        if (node->getType() == Quantize) {
-            auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-            if (quantizeNode == nullptr)
-                IE_THROW() << "Cannot get quantize layer " << node->getName();
-            return !quantizeNode->isBinarization();
-        } else if (node->getType() == Eltwise) {
-            auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-            if (eltwiseNode == nullptr)
-                IE_THROW() << "Cannot get eltwise node " << node->getName();
-
-            return IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Tanh, Logistic, Square, Abs, Sqrt,
-                                                      Linear, BoundedRelu, SoftRelu, Relu6, Exp, Clamp, Swish,
-                                                      Hswish, Mish, Hsigmoid, Round, Erf}) ||
-                    ((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) ||
-                     (eltwiseNode->getOpType() == Prelu));
-        }
-
-        return false;
-    };
-
     auto parent = graphNodes.begin();
     while (parent != graphNodes.end()) {
         auto parentNode = *parent;
@@ -1491,14 +1140,14 @@ void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) {
         }
 
         auto childNode = parentNode->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildNode(childNode)) {
+        if (!parentNode->canFuse(childNode)) {
             parent++;
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
-        if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
+        if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) {
             auto parentEdges = childNode->parentEdges;
             for (auto &parentEdge : parentEdges) {
                 auto p_edge = parentEdge.lock();
@@ -1517,12 +1166,7 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph)
     auto& graphNodes = graph.GetNodes();
 
     auto isSuitableParentNode = [](MKLDNNNodePtr node) {
-        bool isSuitable = (node->getType() == Interpolate);
-        if (isSuitable) {
-            return node->getChildEdges().size() == 1;
-        } else {
-            return false;
-        }
+        return node->getType() == Interpolate && node->getChildEdges().size() == 1;
     };
 
     auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
@@ -1553,9 +1197,9 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph)
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
-        if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
+        if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) {
             auto parentEdges = childNode->parentEdges;
             for (auto &parentEdge : parentEdges) {
                 auto p_edge = parentEdge.lock();
@@ -1570,39 +1214,11 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph)
     }
 }
 
-void MKLDNNGraphOptimizer::FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph) {
+void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        bool isSutableNormalize = node->getType() == Normalize;
-
-        if (isSutableNormalize) {
-            return node->getChildEdges().size() == 1;
-        } else {
-            return false;
-        }
-    };
-
-    auto isSutableChildNode = [&](MKLDNNNodePtr node) {
-        if (!node->getCnnLayer())
-            return false;
-
-        if (node->getType() == Quantize) {
-            auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-            if (quantizeNode == nullptr)
-                IE_THROW() << "Cannot get quantize layer " << node->getName();
-            return !quantizeNode->isBinarization();
-        } else if (node->getType() == Eltwise) {
-            auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-            if (eltwiseNode == nullptr)
-                IE_THROW() << "Cannot get Eltwise node " << node->getName();
-            return IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Tanh, Swish,
-                                                      Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt, SoftRelu}) ||
-                    ((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) ||
-                     (eltwiseNode->getOpType() == Prelu));
-        }
-
-        return false;
+        return node->getType() == NormalizeL2 && node->getChildEdges().size() == 1;
     };
 
     auto parent = graphNodes.begin();
@@ -1614,18 +1230,18 @@ void MKLDNNGraphOptimizer::FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph) {
         }
 
         auto childNode = parentNode->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildNode(childNode)) {
+        if (!parentNode->canFuse(childNode)) {
             parent++;
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
-        if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
+        if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) {
             auto parentEdges = childNode->parentEdges;
             for (auto &parentEdge : parentEdges) {
                 auto p_edge = parentEdge.lock();
-                if (p_edge->getParent()->getType() == Normalize)
+                if (p_edge->getParent()->getType() == NormalizeL2)
                     continue;
 
                 removeEdge(graph, p_edge);
@@ -1644,6 +1260,8 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
     };
 
     auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+        if (parentNode->isConstant() && !childNode->isConstant())
+            return false;
         for (auto &childParentEdge : childNode->getParentEdges()) {
             // WA to prevent unsupported reorder exception issue in some cases
             if (childParentEdge.lock()->getParent()->getType() == Split) {
@@ -1678,9 +1296,9 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
-        if (childNode->getType() == Quantize) {
+        if (childNode->getType() == FakeQuantize) {
             auto parentEdges = childNode->parentEdges;
             for (auto &parentEdge : parentEdges) {
                 auto p_edge = parentEdge.lock();
@@ -1694,6 +1312,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
         } else if (childNode->getType() == Eltwise) {
             auto childs = childNode->childEdges;
             auto parents = childNode->parentEdges;
+            auto initialParentInNum = parentNode->getParentEdges().size();
 
             for (size_t i = 0; i < parents.size(); i++) {
                 auto p_edge = parents[i].lock();
@@ -1732,20 +1351,25 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
                     }
                 } else {
                     MKLDNNEdgePtr &remEdge = p_edge;
+                    auto parentEltwise = parentNode;
                     int inNum = 0;
+                    int outNum = parentNode->getParentEdges().size();
                     if (remEdge) {
                         inNum = remEdge->getInputNum();
+                        // Need to keep order for MulAdd
+                        if (childNode->getAlgorithm() == EltwiseMulAdd) {
+                            outNum = initialParentInNum + remEdge->getOutputNum() - 1;
+                        }
                         remEdge->drop();
                         removeEdge(graph, remEdge);
                     }
 
-                    auto parentEltwise = parentNode;
-                    MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size()));
+                    MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentNode, inNum, outNum));
                     auto &graphEdges = graph.GetEdges();
                     graphEdges.push_back(newEdge);
                     parent->addEdge(newEdge);
 
-                    parentEltwise->inDims.push_back(parent->outDims[0]);
+                    parentNode->inDims.push_back(parent->outDims[0]);
                 }
             }
 
@@ -1756,35 +1380,6 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
     }
 }
 
-void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
-    for (MKLDNNNodePtr& node : graph.GetNodes()) {
-        bool toDrop = false;
-
-        if (node->getType() == Eltwise) {
-            auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
-            if (eltwiseNode->getOpType() == PowerStatic) {
-                PowerLayer *l = dynamic_cast<PowerLayer *>(node->getCnnLayer().get());
-                if (l == nullptr)
-                    IE_THROW() << "Cannot get power layer " << node->getName();
-
-                if (l->power == 1.0f && l->scale == 1.0f && l->offset == 0.0f) toDrop = true;
-            }
-        }
-
-        if (node->getType() == Eltwise && node->getCnnLayer()->type == "ScaleShift") {
-            ScaleShiftLayer* l = dynamic_cast<ScaleShiftLayer*>(node->getCnnLayer().get());
-            if (l == nullptr)
-                IE_THROW() << "Cannot get scale shift layer " << node->getName();
-
-            if (l->_weights == nullptr && l->_biases == nullptr) toDrop = true;
-        }
-
-        if (node->getType() == Copy) toDrop = true;
-
-        if (toDrop) graph.DropNode(node);
-    }
-}
-
 void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
     std::set<MKLDNNNodePtr> processed;
     int graphNodesSize = graph.GetNodes().size();
@@ -1837,107 +1432,6 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
     }
 }
 
-void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) {
-    for (auto input : graph.GetNodes()) {
-        if (input->getType() != Input) {
-            continue;
-        }
-
-        auto inTD = input->getCnnLayer().get()->outData[0]->getTensorDesc();
-        for (size_t i = 0; i < input->getChildEdges().size(); i++) {
-            auto inputEdge = input->getChildEdgeAt(i);
-            auto convert = inputEdge->getChild();
-            if (convert->getType() == Convert) {
-                for (int j = 0; j < convert->getChildEdges().size(); j++) {
-                    auto convertEdge = convert->getChildEdgeAt(j);
-                    auto reorder = convertEdge->getChild();
-                    if (reorder->getType() == Reorder) {
-                        MKLDNNReorderNode* rn = dynamic_cast<MKLDNNReorderNode*>(reorder.get());
-                        auto rnOutput = rn->getOutput();
-                        if (inTD.getPrecision() == rnOutput.getPrecision() &&
-                            inTD.getLayout() == rnOutput.getLayout() &&
-                            inTD.getDims() == rnOutput.getDims()) {
-                            /**
-                             * TODO: just drop extra nodes instead of moving edges
-                             * graph.DropNode(convert);
-                             * graph.DropNode(reorder);
-                             */
-                            auto avterReorder = reorder->getChildEdgeAt(0)->getChild();
-                            auto oldEdgeNum = reorder->getChildEdgeAt(0)->getOutputNum();
-                            reorder->getChildEdgeAt(0)->drop();
-                            convertEdge->drop();
-
-                            MKLDNNEdgePtr newEdge(new MKLDNNEdge(input, avterReorder, i, oldEdgeNum));
-                            graph.GetEdges().push_back(newEdge);
-                            input->addEdge(newEdge);
-                            j--;
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-void MKLDNNGraphOptimizer::RemoveIOScaleShifts(MKLDNNGraph &graph) {
-    for (MKLDNNNodePtr& node : graph.GetNodes()) {
-        if (node->getType() == Eltwise && node->getCnnLayer()->type == "ScaleShift") {
-            ScaleShiftLayer* l = dynamic_cast<ScaleShiftLayer*>(node->getCnnLayer().get());
-            if (l == nullptr)
-                IE_THROW() << "Cannot get scale shift layer " << node->getName();
-
-            auto cur = l->insData[0].lock();
-            if (cur == nullptr) {
-                IE_THROW() << "[MKLDNN] error - invalid input data";
-            }
-            if (cur->getTensorDesc().getPrecision() != l->outData[0]->getTensorDesc().getPrecision()) {
-                if (node->name.find("_iScaleShift_") != std::string::npos) {
-                    auto child = node->childEdges[0].lock()->getChild();
-                    if (child->type == Reorder) {
-                        MKLDNNReorderNode* rn = dynamic_cast<MKLDNNReorderNode*>(child.get());
-                        if (rn != nullptr) {
-                            rn->_scales = l->_weights;
-                            graph.DropNode(node);
-                        }
-                    } else {
-                        IE_THROW() << "Strange case. No Reorder after iScaleShift";
-                    }
-                } else if (node->name.find("_oScaleShift_") != std::string::npos) {
-                    auto parent = node->parentEdges[0].lock()->getParent();
-
-                    if (parent->type == Reorder) {
-                        MKLDNNReorderNode* rn = dynamic_cast<MKLDNNReorderNode*>(parent.get());
-                        if (rn != nullptr) {
-                            rn->_scales = l->_weights;
-                            graph.DropNode(node);
-                        }
-                    } else {
-                        IE_THROW() << "Strange case. No Reorder before oScaleShift";
-                    }
-                }
-            }
-        }
-    }
-}
-
-bool MKLDNNGraphOptimizer::IsOneOf(Type type, std::vector<Type> types) {
-    for (auto tp : types) {
-        if (type == tp) {
-            return true;
-        }
-    }
-    return false;
-}
-
-bool MKLDNNGraphOptimizer::IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs) {
-    for (auto a : algs) {
-        if (alg == a) {
-            return true;
-        }
-    }
-    return false;
-}
-
 void MKLDNNGraphOptimizer::removeEdge(MKLDNNGraph &graph, MKLDNNEdgePtr& edge) {
     auto& edges = graph.GetEdges();
     for (auto it = edges.begin(); it != edges.end(); it++) {
@@ -1978,48 +1472,28 @@ void MKLDNNGraphOptimizer::FuseBroadcastAndEltwise(MKLDNNGraph &graph) {
     }
 }
 
-void MKLDNNGraphOptimizer::FuseClampAndQuantize(MKLDNNGraph &graph) {
+void MKLDNNGraphOptimizer::FuseClampAndFakeQuantize(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableClampNode = [](MKLDNNNodePtr node) {
-        if (node->getType() != Eltwise)
-            return false;
-
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-        if (eltwiseNode == nullptr)
-            IE_THROW() << "Cannot cast " << node->getName() << " to Eltwise node";
-
-        if (eltwiseNode->getChildEdges().size() != 1)
-            return false;
-
-        if (eltwiseNode->getOpType() != Clamp)
-            return false;
-
-        return true;
+        return node->getType() == Eltwise && node->getChildEdges().size() == 1 && node->getAlgorithm() == EltwiseClamp;
     };
 
-    auto isSutableQuantizeNode = [](MKLDNNNodePtr node) {
-        if (node->getType() != Quantize)
-            return false;
-
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot cast " << node->getName() << " to Quantize node";
-
-        return !quantizeNode->isBinarization();
+    auto isSutableFakeQuantizeNode = [](MKLDNNNodePtr node) {
+        return node->getType() == FakeQuantize && node->getAlgorithm() != FQBinarization;
     };
 
-    auto fuseClampAndQuantizeNodes = [](MKLDNNNodePtr parent, MKLDNNNodePtr child) {
+    auto fuseClampAndFakeQuantizeNodes = [](MKLDNNNodePtr parent, MKLDNNNodePtr child) {
         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(parent.get());
         if (eltwiseNode == nullptr)
             IE_THROW() << "Cannot cast " << parent->getName() << " to Eltwise node";
 
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(child.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot cast " << child->getName() << " to Quantize node";
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode*>(child.get());
+        if (fakeQuantizeNode == nullptr)
+            IE_THROW() << "Cannot cast " << child->getName() << " to FakeQuantize node";
 
-        const std::vector<float>& cropLowData = quantizeNode->getCropLow();
-        const std::vector<float>& cropHighData = quantizeNode->getCropHigh();
+        const std::vector<float>& cropLowData = fakeQuantizeNode->getCropLow();
+        const std::vector<float>& cropHighData = fakeQuantizeNode->getCropHigh();
 
         std::vector<float> newCropLow(cropLowData.size());
         std::vector<float> newCropHigh(cropHighData.size());
@@ -2028,8 +1502,8 @@ void MKLDNNGraphOptimizer::FuseClampAndQuantize(MKLDNNGraph &graph) {
         for (int i = 0; i < cropHighData.size(); i++)
             newCropHigh[i] = std::min(cropHighData[i], eltwiseNode->getBeta());
 
-        quantizeNode->setCropLow(newCropLow);
-        quantizeNode->setCropHigh(newCropHigh);
+        fakeQuantizeNode->setCropLow(newCropLow);
+        fakeQuantizeNode->setCropHigh(newCropHigh);
 
         return true;
     };
@@ -2039,80 +1513,60 @@ void MKLDNNGraphOptimizer::FuseClampAndQuantize(MKLDNNGraph &graph) {
         if (!isSutableClampNode(parent)) continue;
 
         auto child = parent->getChildEdgeAt(0)->getChild();
-        if (!isSutableQuantizeNode(child)) continue;
+        if (!isSutableFakeQuantizeNode(child)) continue;
 
-        if (fuseClampAndQuantizeNodes(parent, child)) {
+        if (fuseClampAndFakeQuantizeNodes(parent, child)) {
             graph.DropNode(parent);
         }
     }
 }
 
-void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) {
+void MKLDNNGraphOptimizer::FuseMulAddAndFakeQuantize(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableScaleShiftNode = [](MKLDNNNodePtr node) {
-        if (node->getType() != Eltwise)
-            return false;
-
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-        if (eltwiseNode == nullptr)
-            IE_THROW() << "Cannot cast " << node->getName() << " to eltwise node";
-
-        if (eltwiseNode->getChildEdges().size() != 1)
-            return false;
-
-        if (eltwiseNode->getOpType() != MulAdd)
-            return false;
-
-        return true;
+        return node->getType() == Eltwise && node->getChildEdges().size() == 1 && node->getAlgorithm() == EltwiseMulAdd && node->canBePerformedAsScaleShift();
     };
 
-    auto isSutableQuantizeNode = [](MKLDNNNodePtr node) {
-        if (node->getType() != Quantize)
-            return false;
-
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot cast " << node->getName() << " to Quantize node";
-
-        return !quantizeNode->isBinarization();
+    auto isSutableFakeQuantizeNode = [](MKLDNNNodePtr node) {
+        return node->getType() == FakeQuantize && node->getAlgorithm() != FQBinarization;
     };
 
-    auto fuseScaleShiftAndQuantizeNodes = [](MKLDNNNodePtr parent, MKLDNNNodePtr child) {
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(parent.get());
-        if (eltwiseNode == nullptr)
-            IE_THROW() << "Cannot cast " << parent->getName() << " to eltwise node";
+    auto fuseScaleShiftAndFakeQuantizeNodes = [](MKLDNNNodePtr parent, MKLDNNNodePtr child) {
+        auto fakeQuantizeNode = std::dynamic_pointer_cast<MKLDNNFakeQuantizeNode>(child);
+        if (fakeQuantizeNode == nullptr)
+            IE_THROW() << "Cannot cast " << child->getName() << " to FakeQuantize node";
 
-        auto eltwiseLayer = eltwiseNode->getCnnLayer();
-        if (eltwiseLayer == nullptr)
-            IE_THROW() << "Cannot get scale shift layer " << eltwiseNode->getName();
-
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(child.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot cast " << child->getName() << " to Quantize node";
-
-        Blob::Ptr scalesBlob = eltwiseLayer->blobs["weights"];
-        if (scalesBlob == nullptr)
-            return false;
-
-        Blob::Ptr shiftsBlob = eltwiseLayer->blobs["biases"];
-        if (shiftsBlob == nullptr)
-            return false;
-
-        const float* scalesBufferPtr = scalesBlob->buffer().as<float*>();
-        const float* shiftsBufferPtr = shiftsBlob->buffer().as<float*>();
+        auto scalesBlob = std::dynamic_pointer_cast<MKLDNNInputNode>(parent->getParentEdgesAtPort(1)[0]->getParent())->getConstBlob();
+        auto shiftsBlob = std::dynamic_pointer_cast<MKLDNNInputNode>(parent->getParentEdgesAtPort(2)[0]->getParent())->getConstBlob();
 
         if (scalesBlob->size() != shiftsBlob->size())
             return false;
 
+        std::vector<float> scalesBuffer;
+        const float* scalesBufferPtr = scalesBlob->cbuffer().as<float*>();
+        std::vector<float> shiftsBuffer;
+        const float* shiftsBufferPtr = shiftsBlob->cbuffer().as<float*>();
+
+        if (scalesBlob->getTensorDesc().getPrecision() != Precision::FP32) {
+            scalesBuffer.resize(scalesBlob->size());
+            cpu_convert(scalesBufferPtr, &scalesBuffer[0], scalesBlob->getTensorDesc().getPrecision(), Precision::FP32, scalesBlob->size());
+            scalesBufferPtr = &scalesBuffer[0];
+        }
         for (int i = 0; i < scalesBlob->size(); i++)
             if (scalesBufferPtr[i] == 0.f)
                 return false;
 
-        const std::vector<float>& cropLowData = quantizeNode->getCropLow();
-        const std::vector<float>& cropHighData = quantizeNode->getCropHigh();
-        const std::vector<float>& inputScaleData = quantizeNode->getInputScale();
-        const std::vector<float>& inputShiftData = quantizeNode->getInputShift();
+        if (shiftsBlob->getTensorDesc().getPrecision() != Precision::FP32) {
+            shiftsBuffer.resize(shiftsBlob->size());
+            cpu_convert(shiftsBufferPtr, &shiftsBuffer[0], shiftsBlob->getTensorDesc().getPrecision(), Precision::FP32, shiftsBlob->size());
+            shiftsBufferPtr = &shiftsBuffer[0];
+        }
+
+        const std::vector<float>& cropLowData = fakeQuantizeNode->getCropLow();
+        const std::vector<float>& cropHighData = fakeQuantizeNode->getCropHigh();
+        const std::vector<float>& inputScaleData = fakeQuantizeNode->getInputScale();
+        const std::vector<float>& inputShiftData = fakeQuantizeNode->getInputShift();
 
         std::vector<float> newCropLow(scalesBlob->size());
         std::vector<float> newCropHigh(scalesBlob->size());
@@ -2166,10 +1620,10 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) {
             }
         }
 
-        quantizeNode->setCropLow(newCropLow);
-        quantizeNode->setCropHigh(newCropHigh);
-        quantizeNode->setInputScale(newInputScale);
-        quantizeNode->setInputShift(newInputShift);
+        fakeQuantizeNode->setCropLow(newCropLow);
+        fakeQuantizeNode->setCropHigh(newCropHigh);
+        fakeQuantizeNode->setInputScale(newInputScale);
+        fakeQuantizeNode->setInputShift(newInputShift);
 
         return true;
     };
@@ -2179,13 +1633,13 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) {
         if (!isSutableScaleShiftNode(parent)) continue;
 
         auto child = parent->getChildEdgeAt(0)->getChild();
-        if (!isSutableQuantizeNode(child)) continue;
+        if (!isSutableFakeQuantizeNode(child)) continue;
 
-        if (fuseScaleShiftAndQuantizeNodes(parent, child)) {
+        if (fuseScaleShiftAndFakeQuantizeNodes(parent, child)) {
             auto parentEdges = parent->parentEdges;
             for (auto &parentEdge : parentEdges) {
                 auto p_edge = parentEdge.lock();
-                if (p_edge->getParent()->getCnnLayer()->type != "Const")
+                if (!p_edge->getParent()->isConstant())
                     continue;
 
                 removeEdge(graph, p_edge);
@@ -2196,32 +1650,32 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) {
     }
 }
 
-void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
+void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        return node->getType() == Permute && node->getChildEdges().size() == 1;
+        return node->getType() == Transpose && node->getChildEdges().size() == 1;
     };
 
     auto isSutableChildNode = [](MKLDNNNodePtr node) {
         return node->getType() == Reorder && node->getChildEdges().size() == 1;
     };
 
-    // Method checkAscendingSummaryOrder() checks that after the sequential execution of Permute and Reorder nodes,
-    // the order of the elements in the memory will not change. In other words, that Permute+Reorder is identical permutation.
+    // Method checkAscendingSummaryOrder() checks that after the sequential execution of Transpose and Reorder nodes,
+    // the order of the elements in the memory will not change. In other words, that Transpose+Reorder is identical permutation.
     auto checkAscendingSummaryOrder = [](std::shared_ptr<MKLDNNNode> &parentNode, std::shared_ptr<MKLDNNNode> &childNode) -> bool {
-        auto* permuteNode = dynamic_cast<MKLDNNPermuteNode*>(parentNode.get());
+        auto* transposeNode = dynamic_cast<MKLDNNTransposeNode*>(parentNode.get());
         auto* reorderNode = dynamic_cast<MKLDNNReorderNode*>(childNode.get());
-        if (!permuteNode || !reorderNode) {
+        if (!transposeNode || !reorderNode) {
             return false;
         }
 
-        auto& permuteOrder = permuteNode->getOrder();
-        auto& layoutOrder = permuteNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
+        auto& transposeOrder = transposeNode->getOrder();
+        auto& layoutOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
         auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder();
         auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
 
-        if (permuteOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) {
+        if (transposeOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) {
             return false;
         }
 
@@ -2231,10 +1685,10 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
             revLayoutOrder[layoutOrder[i]] = i;
         }
 
-        // newPermuteOrder - Permute layout-aware permutation
-        auto newPermuteOrder = SizeVector(permuteOrder.size());
-        for (int i = 0; i < newPermuteOrder.size(); i++) {
-            newPermuteOrder[i] = layoutOrder[permuteOrder[revLayoutOrder[i]]];
+        // newTransposeOrder - Transpose layout-aware permutation
+        auto newTransposeOrder = SizeVector(transposeOrder.size());
+        for (int i = 0; i < newTransposeOrder.size(); i++) {
+            newTransposeOrder[i] = layoutOrder[transposeOrder[revLayoutOrder[i]]];
         }
 
         // reorderOrder - Reorder layout-aware permutation
@@ -2248,13 +1702,13 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
             }
         }
 
-        // summaryOrder - resulting Permute+Reorder permutation
-        auto summaryOrder = SizeVector(permuteOrder.size());
+        // summaryOrder - resulting Transpose+Reorder permutation
+        auto summaryOrder = SizeVector(transposeOrder.size());
         for (int i = 0; i < summaryOrder.size(); i++) {
-            summaryOrder[i] = reorderOrder[newPermuteOrder[i]];
+            summaryOrder[i] = reorderOrder[newTransposeOrder[i]];
         }
 
-        // check that Permute+Reorder is the identical permutation
+        // check that Transpose+Reorder is the identical permutation
         for (int i = 0; i < summaryOrder.size(); i++) {
             if (summaryOrder[i] != i) {
                 return false;
@@ -2264,22 +1718,34 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
         return true;
     };
 
-    // Permute and Reorder do opposite permutation to each other.
+    // Transpose and Reorder do opposite permutation to each other.
     // Example:
-    //      chain [physical layout: NCHW, logical layout: NCHW] -> Permute(order=0312) -> [physical layout: NWCH, logical layout: NCHW] ->
+    //      chain [physical layout: NCHW, logical layout: NCHW] -> Transpose(order=0312) -> [physical layout: NWCH, logical layout: NCHW] ->
     //      Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true)
     //      which will just reinterprets layout without physical change of the memory.
     // Two cases are possible:
     //      1) inPrec = outPrec
-    //          In this case, we replace Permute+Reorder pattern with a new Reorder that does nothing.
+    //          In this case, we replace Transpose+Reorder pattern with a new Reorder that does nothing.
     //      2) inPrec != outPrec
-    //          As in the first case, we also replace Permute+Reorder pattern with a new Reorder.
+    //          As in the first case, we also replace Transpose+Reorder pattern with a new Reorder.
     //          Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec)
     //          to the output precision (outPrec)
-    auto mergePermuteAndReorder = [&](std::shared_ptr<MKLDNNNode>& parentNode, std::shared_ptr<MKLDNNNode>& childNode) {
-        auto parentParentNode = parentNode->getParentEdgeAt(0)->getParent();
+    auto mergeTransposeAndReorder = [&](std::shared_ptr<MKLDNNNode>& parentNode, std::shared_ptr<MKLDNNNode>& childNode) {
+        auto parentParentNode = parentNode->getParentEdgesAtPort(0)[0]->getParent();
+        auto parentParentConstNode = parentNode->getParentEdgesAtPort(1)[0]->getParent();
         auto childChildNode = childNode->getChildEdgeAt(0)->getChild();
 
+        auto &remEdge = parentParentConstNode->getChildEdgeAt(0);
+        remEdge->drop();
+        auto& edges = graph.GetEdges();
+        for (auto it = edges.begin(); it != edges.end(); it++) {
+            if ((*it) == remEdge) {
+                edges.erase(it);
+                parentParentConstNode->remove();
+                break;
+            }
+        }
+
         graph.DropNode(parentNode);
         graph.DropNode(childNode);
 
@@ -2303,6 +1769,9 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
                 break;
             }
         }
+        if (!edge) {
+            IE_THROW() << "Transpose node '" << parentNode->getName() << "' has invalid edges.";
+        }
 
         auto reorderNode = graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true);
 
@@ -2329,7 +1798,7 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
         }
 
         if (checkAscendingSummaryOrder(parentNode, childNode)) {
-            mergePermuteAndReorder(parentNode, childNode);
+            mergeTransposeAndReorder(parentNode, childNode);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
index 60034d6cbec..01efb75e814 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@@ -19,36 +19,26 @@ public:
     void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
 
 private:
-    void MergeGroupConvolution(MKLDNNGraph& graph);
-    void MergeTwoEqualScaleShifts(MKLDNNGraph& graph);
-    void FuseConvolutionAndActivation(MKLDNNGraph &graph);
+    void FuseConvolutionAndBias(MKLDNNGraph &graph);
+    void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph);
+    void FuseMultiplyAndAdd(MKLDNNGraph &graph);
     void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);
-    void FuseConvolutionAndDepthwise(MKLDNNGraph &graph);
+    void FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph);
     void FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph);
     void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph);
-    void FuseConvolutionAndQuantize(MKLDNNGraph &graph);
-    void FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph);
-    void FusePoolingAndQuantize(MKLDNNGraph &graph);
-    void FuseBatchNormWithScale(MKLDNNGraph& graph);
+    void FusePoolingAndFakeQuantize(MKLDNNGraph &graph);
     void FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph);
     void FuseMVNAndSimpleOperation(MKLDNNGraph &graph);
     void FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph);
-    void FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph);
-    void RemoveIdentityOperator(MKLDNNGraph& graph);
+    void FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph);
 
-    void RemoveIOScaleShifts(MKLDNNGraph& graph);
     void DropDoubleReorders(MKLDNNGraph& graph);
-    void DropConvertReorder(MKLDNNGraph& graph);
-    void AddConvertToReorder(MKLDNNGraph &graph);
     void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph);
     void FuseBroadcastAndEltwise(MKLDNNGraph &graph);
     void FuseEltwiseAndSimple(MKLDNNGraph &graph);
-    void FuseScaleShiftAndQuantize(MKLDNNGraph &graph);
-    void FuseClampAndQuantize(MKLDNNGraph &graph);
-    void MergePermuteAndReorder(MKLDNNGraph &graph);
-
-    bool IsOneOf(Type type, std::vector<Type> types);
-    bool IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs);
+    void FuseMulAddAndFakeQuantize(MKLDNNGraph &graph);
+    void FuseClampAndFakeQuantize(MKLDNNGraph &graph);
+    void MergeTransposeAndReorder(MKLDNNGraph &graph);
 
     void removeEdge(MKLDNNGraph &graph, MKLDNNEdgePtr& edge);
 };
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
index 439b5c85d28..ca1dd9ed589 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
@@ -20,7 +20,8 @@
 #include "nodes/common/cpu_memcpy.h"
 #include "mkldnn_async_infer_request.h"
 #include <debug.h>
-
+#include "utils/general_utils.h"
+#include "utils/cpu_utils.hpp"
 
 MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsDataMap     networkInputs,
                                                      InferenceEngine::OutputsDataMap    networkOutputs,
@@ -103,33 +104,14 @@ void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() {
             IE_THROW() << "Input blobs map contains not registered during IInferencePlugin::LoadNetwork blob with name " << input.first;
         }
         auto inPrec = input.second->getTensorDesc().getPrecision();
+        if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) {
+            inPrec = InferenceEngine::Precision::FP32;
+        } else {
+            inPrec = normalizeToSupportedPrecision(inPrec);
+        }
 
-        switch (inPrec) {
-            // these precisions are supported by mkldnn, so we push the blob directly
-            case InferenceEngine::Precision::I8:
-            case InferenceEngine::Precision::I32:
-            case InferenceEngine::Precision::BF16:
-            case InferenceEngine::Precision::FP32: {
-                break;
-            }
-            // these precisions are supported by mkldnn, so we push the blob directly
-            // BUT if a mean image exists, we convert the blob and send FP32
-            case InferenceEngine::Precision::U8:
-            case InferenceEngine::Precision::BOOL: {
-                if (graph->hasMeanImageFor(input.first))
-                    inPrec = InferenceEngine::Precision::FP32;
-                break;
-            }
-            // these precisions are unsupported by mkldnn, so we convert the blob and send I32
-            case InferenceEngine::Precision::U16:
-            case InferenceEngine::Precision::I16:
-            case InferenceEngine::Precision::I64:
-            case InferenceEngine::Precision::U64: {
-                inPrec = InferenceEngine::Precision::I32;
-                break;
-            }
-            default:
-                IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
+        if (inPrec == InferenceEngine::Precision::UNSPECIFIED) {
+            IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
         }
 
         // User can initialize input via setBlob API using tensorDesc with default (ANY) layout.
@@ -246,7 +228,6 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
         }
 
         InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc();
-        InferenceEngine::Precision originPrecision = blobs[name]->getTensorDesc().getPrecision();
         if (_networkInputs.find(name) != _networkInputs.end()) {
             InferenceEngine::Layout l = _networkInputs[name]->getLayout();
             InferenceEngine::Precision p = _networkInputs[name]->getPrecision();
@@ -257,7 +238,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
 
         _inputs[name] = make_blob_with_precision(desc);
         _inputs[name]->allocate();
-        if (desc.getPrecision() == originPrecision &&
+        if (blobs[name]->getTensorDesc() == desc &&
                 graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
             externalPtr[name] = _inputs[name]->buffer();
         }
@@ -274,7 +255,8 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
             return data;
         }
 
-        InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc();
+        InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc();
+        desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision()));
 
         // WA: need to avoid exception thrown when we compare blocking desc in SetBlob
         // in situation if we push output blobs as inputs for next network (in Hetero plugin)
@@ -285,7 +267,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
 
         _outputs[name] = make_blob_with_precision(desc);
         _outputs[name]->allocate();
-        if (desc.getPrecision() == InferenceEngine::Precision::FP32 && !graph->getProperty().batchLimit) {
+        if (blobs[name]->getTensorDesc() == desc && !graph->getProperty().batchLimit) {
             externalPtr[name] = _outputs[name]->buffer();
         }
         data = _outputs[name];
@@ -351,7 +333,12 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In
                 IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch.";
             }
 
-            if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
+            InferenceEngine::BlobMap blobs;
+            graph->getInputBlobs(blobs);
+            if (blobs.find(name) == blobs.end())
+                IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name;
+
+            if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() &&
                 graph->_meanImages.find(name) == graph->_meanImages.end() && !graph->getProperty().batchLimit) {
                 externalPtr[name] = data->buffer();
             } else if (externalPtr.find(name) != externalPtr.end()) {
@@ -382,7 +369,13 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In
             foundOutput->getTensorDesc().getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) {
                 IE_THROW(ParameterMismatch) << "Failed to set output blob. Blocking descriptor mismatch.";
         }
-        if (data->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32 &&
+
+        InferenceEngine::BlobMap blobs;
+        graph->getOutputBlobs(blobs);
+        if (blobs.find(name) == blobs.end())
+            IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name;
+
+        if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() &&
                 !graph->getProperty().batchLimit) {
             externalPtr[name] = data->buffer();
         } else if (externalPtr.find(name) != externalPtr.end()) {
@@ -398,8 +391,8 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void *
 
 void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
     for (auto& it : externalPtr) {
-        auto input = graph->inputNodes.find(it.first);
-        if (input != graph->inputNodes.end()) {
+        auto input = graph->inputNodesMap.find(it.first);
+        if (input != graph->inputNodesMap.end()) {
             if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second)
                 continue;
             // Input cannot be in-place with other primitives
@@ -432,9 +425,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
         }
 
         MKLDNNNodePtr output;
-        for (auto& out : graph->outputNodes) {
-            if (out->getName() == "out_" + it.first) {
-                output = out;
+        for (auto& out : graph->outputNodesMap) {
+            if (out.first == it.first) {
+                output = out.second;
                 break;
             }
         }
@@ -493,4 +486,4 @@ void MKLDNNPlugin::MKLDNNInferRequest::ThrowIfCanceled() const {
     if (_asyncRequest != nullptr) {
         _asyncRequest->ThrowIfCanceled();
     }
-}
\ No newline at end of file
+}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
index ce7afca2e65..4df0be7b692 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@@ -489,8 +489,8 @@ static const std::map<int, std::vector<mkldnn::memory::format_tag>> form_tags_by
         mkldnn::memory::format_tag::aBCde4c8b2c,
     }}, {6, {                                    // Popular
         mkldnn::memory::format_tag::abcdef,      // plain
-        mkldnn::memory::format_tag::acbdef,      // permuted
-        mkldnn::memory::format_tag::defcab,      // permuted
+        mkldnn::memory::format_tag::acbdef,      // permute
+        mkldnn::memory::format_tag::defcab,      // permute
         mkldnn::memory::format_tag::aBcdef16b,   // blocked 16c
 
         mkldnn::memory::format_tag::aBCdef16b16c,
@@ -565,18 +565,46 @@ bool MKLDNNMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const {
     auto refStrides = refDesc.data.format_desc.blocking.strides;
 
     std::vector<size_t> actualOrder(desc.data.ndims);
-    std::iota(actualOrder.begin(), actualOrder.end(), 0);
-    std::sort(actualOrder.begin(), actualOrder.end(),
-              [&actualStrides] (size_t ind_l, size_t ind_r) {
-                  return actualStrides[ind_l] > actualStrides[ind_r];
-              });
+    {
+        const auto dims = desc.dims();
+        std::vector<size_t> total_block_per_dim(dims.size(), 1);
+        const auto &blk_desc = desc.data.format_desc.blocking;
+        for (int i = 0; i < blk_desc.inner_nblks; i++) {
+            total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i];
+        }
+        std::vector<size_t> outer_block_dims(std::begin(dims), std::begin(dims) + dims.size());
+        for (size_t i = 0; i < outer_block_dims.size(); i++) {
+            outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]);
+        }
+
+        std::iota(actualOrder.begin(), actualOrder.end(), 0);
+        std::sort(actualOrder.begin(), actualOrder.end(),
+                  [&actualStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) {
+                      return (actualStrides[ind_l] > actualStrides[ind_r]) ||
+                             (actualStrides[ind_l] == actualStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]);
+                  });
+    }
 
     std::vector<size_t> refOrder(refDesc.data.ndims);
-    std::iota(refOrder.begin(), refOrder.end(), 0);
-    std::sort(refOrder.begin(), refOrder.end(),
-              [&refStrides] (size_t ind_l, size_t ind_r) {
-                  return refStrides[ind_l] > refStrides[ind_r];
-              });
+    {
+        const auto dims = refDesc.dims();
+        std::vector<size_t> total_block_per_dim(dims.size(), 1);
+        const auto &blk_desc = refDesc.data.format_desc.blocking;
+        for (int i = 0; i < blk_desc.inner_nblks; i++) {
+            total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i];
+        }
+        std::vector<size_t> outer_block_dims(std::begin(dims), std::begin(dims) + dims.size());
+        for (size_t i = 0; i < outer_block_dims.size(); i++) {
+            outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]);
+        }
+
+        std::iota(refOrder.begin(), refOrder.end(), 0);
+        std::sort(refOrder.begin(), refOrder.end(),
+                  [&refStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) {
+                      return (refStrides[ind_l] > refStrides[ind_r]) ||
+                             (refStrides[ind_l] == refStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]);
+                  });
+    }
 
     if (actualOrder != refOrder) {
         return false;
@@ -682,14 +710,6 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
     const size_t inner_ndims = blk_desc.inner_nblks;
     const size_t total_ndims = outer_ndims + inner_ndims;
 
-    // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3}
-    std::vector<size_t> outer_order(outer_ndims);
-    std::iota(outer_order.begin(), outer_order.end(), 0);
-    std::sort(outer_order.begin(), outer_order.end(),
-              [&blk_desc] (size_t ind_l, size_t ind_r) {
-        return blk_desc.strides[ind_l] > blk_desc.strides[ind_r];
-    });
-
     // strides of inner dims. In case of 4i16o4i will be {64, 4, 1}
     std::vector<size_t> inner_strides(inner_ndims, 1);
     for (size_t i = 1; i < blk_desc.inner_nblks; i++) {
@@ -701,6 +721,19 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
     for (int i = 0; i < inner_ndims; i++) {
         total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i];
     }
+    std::vector<size_t> outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims);
+    for (size_t i = 0; i < outer_block_dims.size(); i++) {
+        outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]);
+    }
+
+    // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3}
+    std::vector<size_t> outer_order(outer_ndims);
+    std::iota(outer_order.begin(), outer_order.end(), 0);
+    std::sort(outer_order.begin(), outer_order.end(),
+              [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) {
+        return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) ||
+               (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]);
+    });
 
     // IE blocked order
     // [new_outer_order] U [inner_idxs]
@@ -721,7 +754,7 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
     std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks,
               ie_blk_dims.end() - blk_desc.inner_nblks);
     std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(),
-                   [&] (size_t i) { return div_up(dims[i], total_block_per_dim[i]); });
+                   [&] (size_t i) { return outer_block_dims[i]; });
 
     // IE offset padded to data. Same as for oneDNN
     SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims};
@@ -742,7 +775,7 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
         MKLDNNMemory::convertToIePrec(desc.data_type()),
         SizeVector {begin(dims), end(dims)},
         ie_blk_desc };
-    // TODO: BLOCKED is the most common layout which covers all other permuted layout like NHWC.
+    // TODO: BLOCKED is the most common layout which covers all other permute layout like NHWC.
     //       But for some cases we have to specify it more correctly.. may be.. or just keep
     //       auto detected layout in constructor of TensorDesc.
     return res;
@@ -809,7 +842,7 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
         is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]);
     }
 
-    // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to permute blocked dims
+    // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims
     //       and may be we can achieve correct "descending strides" form which allow conversion.
     if (!is_descending_strides)
         IE_THROW() << "Unsupported case for conversion";
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index f446c339d39..ca4db7fd47c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -13,12 +13,11 @@
 #include <cstdint>
 #include <unordered_map>
 
-#include <nodes/mkldnn_batchnorm_node.h>
 #include <nodes/mkldnn_concat_node.h>
 #include <nodes/mkldnn_conv_node.h>
 #include <nodes/mkldnn_deconv_node.h>
 #include <nodes/mkldnn_eltwise_node.h>
-#include <nodes/mkldnn_gemm_node.h>
+#include <nodes/mkldnn_matmul_node.h>
 #include <nodes/mkldnn_fullyconnected_node.h>
 #include <nodes/mkldnn_generic_node.h>
 #include <nodes/mkldnn_input_node.h>
@@ -30,7 +29,7 @@
 #include <nodes/mkldnn_tile_node.h>
 #include <nodes/mkldnn_split_node.h>
 #include <nodes/mkldnn_pad_node.h>
-#include <nodes/mkldnn_permute_node.h>
+#include <nodes/mkldnn_transpose_node.h>
 #include <nodes/mkldnn_memory_node.hpp>
 #include <nodes/mkldnn_mvn_node.h>
 #include <nodes/mkldnn_normalize_node.h>
@@ -41,6 +40,8 @@
 #include <nodes/mkldnn_depth_to_space_node.h>
 #include <nodes/mkldnn_space_to_depth_node.h>
 #include <nodes/mkldnn_strided_slice_node.h>
+#include <nodes/mkldnn_reference_node.h>
+#include <nodes/mkldnn_fake_quantize_node.h>
 #include <mkldnn_types.h>
 #include <dnnl_types.h>
 #include "mkldnn_extension_utils.h"
@@ -49,6 +50,10 @@
 #include "mkldnn_debug.h"
 #include "utils/rt_info/memory_formats_attribute.hpp"
 
+#include <ie_ngraph_utils.hpp>
+#include "utils/general_utils.h"
+#include "utils/cpu_utils.hpp"
+
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace openvino;
@@ -56,94 +61,120 @@ using namespace openvino;
 using namespace InferenceEngine::details;
 namespace MKLDNNPlugin {
 static const InferenceEngine::details::caseless_unordered_map<std::string, Type> type_to_name_tbl = {
-        { "Unknown", Unknown },
-        { "Input", Input },
-        { "Const", Input },
-        { "Output", Output },
-        { "Reorder", Reorder },
+        { "Constant", Input },
+        { "Parameter", Input },
+        { "Result", Output },
         { "Convolution", Convolution },
-        { "ReLU", Eltwise },
-        { "GELU", Eltwise },
-        { "ELU", Eltwise },
+        { "GroupConvolution", Convolution },
+        { "MatMul", MatMul },
+        { "FullyConnected", FullyConnected },
+        { "MaxPool", Pooling },
+        { "AvgPool", Pooling },
+        { "Add", Eltwise },
+        { "Subtract", Eltwise },
+        { "Multiply", Eltwise },
+        { "Divide", Eltwise },
+        { "SquaredDifference", Eltwise },
+        { "Maximum", Eltwise },
+        { "Minimum", Eltwise },
+        { "Mod", Eltwise },
+        { "FloorMod", Eltwise },
+        { "Power", Eltwise },
+        { "PowerStatic", Eltwise },
+        { "Equal", Eltwise },
+        { "NotEqual", Eltwise },
+        { "Greater", Eltwise },
+        { "GreaterEqual", Eltwise },
+        { "Less", Eltwise },
+        { "LessEqual", Eltwise },
+        { "LogicalAnd", Eltwise },
+        { "LogicalOr", Eltwise },
+        { "LogicalXor", Eltwise },
+        { "LogicalNot", Eltwise },
+        { "Relu", Eltwise },
+        { "LeakyRelu", Eltwise },
+        { "Gelu", Eltwise },
+        { "Elu", Eltwise },
+        { "Tanh", Eltwise },
         { "Sigmoid", Eltwise },
-        { "Logistic", Eltwise },
-        { "TanH", Eltwise },
-        { "ReLU6", Eltwise },
-        { "Exp", Eltwise },
-        { "Not", Eltwise },
-        { "Activation", Eltwise },
+        { "Abs", Eltwise },
+        { "Sqrt", Eltwise },
         { "Clamp", Eltwise },
-        { "Swish", Eltwise },
+        { "Exp", Eltwise },
+        { "SwishCPU", Eltwise },
         { "HSwish", Eltwise },
         { "Mish", Eltwise },
         { "HSigmoid", Eltwise },
         { "Round", Eltwise },
-        { "ScaleShift", Eltwise },
-        { "PReLU", Eltwise },
+        { "PRelu", Eltwise },
+        { "Erf", Eltwise },
         { "SoftPlus", Eltwise },
-        { "Norm", Lrn },
-        { "LRN", Lrn },
-        { "Pooling", Pooling },
-        { "FullyConnected", FullyConnected },
-        { "InnerProduct", FullyConnected },
-        { "Gemm", Gemm },
-        { "Softmax", SoftMax },
-        { "SoftMax", SoftMax },
-        { "Split", Split },
-        { "Slice", Split },
-        { "Concat", Concatenation },
-        { "Deconvolution", Deconvolution },
-        { "Eltwise", Eltwise },
-        { "Mod", Eltwise },
-        { "Power", Eltwise },
         { "Reshape", Reshape },
+        { "Squeeze", Reshape },
+        { "Unsqueeze", Reshape },
+        { "Softmax", Softmax },
+        { "Reorder", Reorder },
+        { "BatchToSpace", BatchToSpace },
+        { "SpaceToBatch", SpaceToBatch },
+        { "DepthToSpace", DepthToSpace },
+        { "SpaceToDepth", SpaceToDepth },
+        { "Roll", Roll },
+        { "LRN", Lrn },
+        { "Split", Split },
+        { "VariadicSplit", Split },
+        { "Concat", Concatenation },
+        { "ConvolutionBackpropData", Deconvolution },
+        { "GroupConvolutionBackpropData", Deconvolution },
+        { "StridedSlice", StridedSlice },
         { "Tile", Tile },
-        { "SimplerNMS", SimplerNMS },
         { "ROIAlign", ROIAlign },
         { "ROIPooling", ROIPooling },
-        { "BatchNormalization", BatchNormalization },
-        { "DepthToSpace", DepthToSpace },
-        { "Flatten", Flatten },
+        { "PSROIPooling", PSROIPooling },
+        { "DeformablePSROIPooling", PSROIPooling },
         { "Pad", Pad },
-        { "Permute", Permute },
-        { "SpaceToDepth", SpaceToDepth },
-        { "StridedSlice", StridedSlice },
-        { "Copy", Copy },
+        { "Transpose", Transpose },
         { "LSTMCell", RNNCell },
         { "GRUCell", RNNCell },
         { "RNNCell", RNNCell },
         { "LSTMSequence", RNNSeq },
         { "GRUSequence", RNNSeq },
         { "RNNSequence", RNNSeq },
-        { "Quantize", Quantize },
-        { "FakeQuantize", Quantize },
+        { "FakeQuantize", FakeQuantize },
         { "BinaryConvolution", BinaryConvolution },
         { "DeformableConvolution", DeformableConvolution },
         { "TensorIterator", TensorIterator },
         { "Loop", TensorIterator },
-        { "MemoryInput", MemoryInput},  // for construction from name ctor, arbitrary name is used
-        { "Memory", MemoryOutput },  // for construction from layer ctor
+        { "ReadValue", MemoryInput},  // for construction from name ctor, arbitrary name is used
+        { "Assign", MemoryOutput },  // for construction from layer ctor
         { "Convert", Convert },
         { "MVN", MVN},
-        { "Normalize", Normalize},
+        { "NormalizeL2", NormalizeL2},
         { "ScatterUpdate", ScatterUpdate},
         { "ScatterElementsUpdate", ScatterElementsUpdate},
         { "ScatterNDUpdate", ScatterNDUpdate},
         { "Interpolate", Interpolate},
-        { "ReduceAnd", ReduceAnd},
-        { "ReduceL1", ReduceL1},
-        { "ReduceL2", ReduceL2},
-        { "ReduceLogSum", ReduceLogSum},
-        { "ReduceLogSumExp", ReduceLogSumExp},
-        { "ReduceMax", ReduceMax},
-        { "ReduceMean", ReduceMean},
-        { "ReduceMin", ReduceMin},
-        { "ReduceOr", ReduceOr},
-        { "ReduceProd", ReduceProd},
-        { "ReduceSum", ReduceSum},
-        { "ReduceSumSquare", ReduceSumSquare},
-        { "Erf", Eltwise },
-        { "Roll", Roll },
+        { "ReduceL1", Reduce},
+        { "ReduceL2", Reduce},
+        { "ReduceLogicalAnd", Reduce},
+        { "ReduceLogicalOr", Reduce},
+        { "ReduceMax", Reduce},
+        { "ReduceMean", Reduce},
+        { "ReduceMin", Reduce},
+        { "ReduceProd", Reduce},
+        { "ReduceSum", Reduce},
+        { "ReduceLogSum", Reduce},
+        { "ReduceLogSumExp", Reduce},
+        { "ReduceSumSquare", Reduce},
+        { "Broadcast", Broadcast},
+        { "EmbeddingSegmentsSum", EmbeddingSegmentsSum},
+        { "EmbeddingBagPackedSum", EmbeddingBagPackedSum},
+        { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum},
+        { "Gather", Gather},
+        { "GatherElements", GatherElements},
+        { "GatherND", GatherND},
+        { "OneHot", OneHot},
+        { "RegionYolo", RegionYolo},
+        { "Select", Select}
 };
 
 Type TypeFromName(const std::string type) {
@@ -162,44 +193,65 @@ MKLDNNNode::NodesFactory & MKLDNNNode::factory() {
     return factoryInstance;
 }
 
-MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
-        MKLDNNWeightsSharing::Ptr &w_cache)
+MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache)
         : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
-          weightCache(w_cache), cnnLayer(layer), engine(eng), name(layer->name), typeStr(layer->type),
-          type(TypeFromName(layer->type)), profiling(layer->name) {
-    if (!layer->outData.empty()) {
-        for (const auto& outData : layer->outData) {
-            outDims.emplace_back(outData->getDims());
+          weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()),
+          type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
+    algorithm = Algorithm::Undefined;
+    fusingPort = -1;
+
+    const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();
+    for (size_t i = 0; i < op->get_input_size(); i++) {
+        if (op->get_input_partial_shape(i).is_dynamic())
+            IE_THROW() << errorPrefix << " has dynamic input shape on " << i << " port, but CPU plug-in supports only static shape";
+    }
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        if (op->get_output_partial_shape(i).is_dynamic())
+            IE_THROW() << errorPrefix << " has dynamic output shape on " << i << " port, but CPU plug-in supports only static shape";
+    }
+
+    for (size_t i = 0; i < op->get_input_size(); i++) {
+        const auto &shape = op->get_input_shape(i);
+        inDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape);
+        originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i)));
+    }
+
+    if (typeStr != "Result" && typeStr != "Assign") {
+        if (op->get_output_size() == 0) {
+            IE_THROW() << "Node with type '" << typeStr << "' and name '" << name << "' does not have any outputs.";
         }
-    } else {
-        if (!(CaselessEq<std::string>()(layer->type, "memory") ||
-            CaselessEq<std::string>()(layer->type, "memoryinput") ||
-            CaselessEq<std::string>()(layer->type, "output") ||
-            CaselessEq<std::string>()(layer->type, "reorder") ||
-            CaselessEq<std::string>()(layer->type, "convert"))) {
-            IE_THROW() << "Inappropriate layer type: " << layer->type << " name: " << layer->name;
+        for (size_t i = 0; i < op->get_output_size(); i++) {
+            const auto &shape = op->get_output_shape(i);
+            outDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape);
+            originalOutputPrecisions.emplace_back(details::convertPrecision(op->get_output_element_type(i)));
         }
     }
 
-    for (const auto& inData : layer->insData) {
-        inDims.emplace_back(inData.lock()->getDims());
+    const auto& rtInfo = op->get_rt_info();
+    if (rtInfo.count("originalLayersNames")) {
+        originalLayers = getRTInfoValue(rtInfo, "originalLayersNames");
     }
-    if (layer->params.find("PrimitivesPriority") != layer->params.end()) {
-        std::istringstream stream(layer->params["PrimitivesPriority"]);
+
+    if (originalLayers.empty()) {
+        addOriginalLayer(name);
+    }
+
+    auto primitivesPriority = getPrimitivesPriorityValue(op);
+    if (!primitivesPriority.empty()) {
+        std::istringstream stream(primitivesPriority);
         std::string str;
         while (getline(stream, str, ',')) {
             if (str.substr(0, 4) != "cpu:")
                 continue;
             implPriorities.push_back(parse_impl_name(str));
             if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown &&
-                    str != "cpu:unknown")
+                str != "cpu:unknown")
                 IE_THROW() << "Unsupported CPU implementation " << str << " for node " << getName();
         }
     }
 
-    auto ngraphNode = layer->getNode();
-    if (ngraphNode != nullptr) {
-        std::string inputMemoryFormats = ngraph::getMLKDNNInputMemoryFormats(ngraphNode);
+    if (op != nullptr) {
+        std::string inputMemoryFormats = ngraph::getMLKDNNInputMemoryFormats(op);
         if (!inputMemoryFormats.empty()) {
             std::istringstream stream(inputMemoryFormats);
             std::string str;
@@ -210,7 +262,7 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
             }
         }
 
-        std::string outputMemoryFormats = ngraph::getMLKDNNOutputMemoryFormats(ngraphNode);
+        std::string outputMemoryFormats = ngraph::getMLKDNNOutputMemoryFormats(op);
         if (!outputMemoryFormats.empty()) {
             std::istringstream stream(outputMemoryFormats);
             std::string str;
@@ -223,6 +275,13 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
     }
 }
 
+MKLDNNNode::MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache)
+        : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown),
+          weightCache(w_cache), engine(eng), name(name), typeStr(type),
+          type(TypeFromName(type)), profiling(name) {
+    // TODO [NM]: What about filling inDims and outDims?
+}
+
 void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) {
     auto edgePtr = edge.lock();
     if (!edgePtr)
@@ -669,67 +728,6 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
     selectedPD->getConfig() = rightConfig;
 }
 
-InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeVector dims, bool weights, bool isGrouped) {
-    auto checkSize = [](size_t dst_size, size_t src_size) {
-        if (dst_size < src_size) {
-            IE_THROW() << "Cannot create internal buffer. Buffer can be overrun.";
-        }
-    };
-    auto * wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(getCnnLayer().get());
-    if (wLayer == nullptr)
-        IE_THROW() << "Cannot get weightable layer for node " << getName() << ".";
-
-    InferenceEngine::Blob::Ptr blb = weights ? wLayer->_weights : wLayer->_biases;
-
-    if (blb == nullptr)
-        IE_THROW() << "Cannot get internal blob layer for node " << getName() << ".";
-
-    auto intLayout = getWeightsLayoutByDims(dims, isGrouped);
-
-    InferenceEngine::TensorDesc desc(blb->getTensorDesc().getPrecision(), dims, intLayout);
-
-    auto fillInternalBlob = [&](char *data, size_t intBuffSize) {
-        size_t offset = blb->byteSize();
-        checkSize(intBuffSize, offset);
-        cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
-        data += blb->byteSize();
-        for (const auto &merged : getMergeWith()) {
-            wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(merged->getCnnLayer().get());
-            if (wLayer == nullptr)
-                IE_THROW() << "Cannot convert merged weightable layer for node "
-                                   << getName() << ".";
-            blb = weights ? wLayer->_weights : wLayer->_biases;
-
-            if (blb == nullptr)
-                IE_THROW() << "Cannot get internal blob layer for node " << getName() << ".";
-            offset += blb->byteSize();
-            checkSize(intBuffSize, offset);
-            cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize());
-            data += blb->byteSize();
-        }
-    };
-
-    Blob::Ptr internalBlob;
-    if (blb->getTensorDesc().getPrecision() == Precision::BIN) {
-        internalBlob = InferenceEngine::make_shared_blob<int8_t>(desc);
-    } else if (blb->getTensorDesc().getPrecision() == Precision::I8) {
-        internalBlob = InferenceEngine::make_shared_blob<int8_t>(desc);
-    } else if (blb->getTensorDesc().getPrecision() == Precision::I32) {
-        internalBlob = InferenceEngine::make_shared_blob<int32_t>(desc);
-    } else if (blb->getTensorDesc().getPrecision() == Precision::BF16) {
-        internalBlob = InferenceEngine::make_shared_blob<int16_t>(desc);
-    } else {
-        internalBlob = InferenceEngine::make_shared_blob<float>(desc);
-    }
-    internalBlob->allocate();
-    char *data = internalBlob->buffer();
-    size_t intBuffSize = internalBlob->byteSize();
-
-    fillInternalBlob(data, intBuffSize);
-
-    return internalBlob;
-}
-
 void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
     for (size_t i = 0; i < getChildEdges().size(); i++) {
         auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
@@ -837,18 +835,17 @@ MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vector<MKLDNN
     return constant;
 }
 
-void MKLDNNNode::addOriginalLayer(const InferenceEngine::CNNLayerPtr &layer) {
-    if (!layer) return;
+void MKLDNNNode::addOriginalLayer(const std::string& layerName) {
+    if (layerName.empty()) return;
     if (originalLayers.empty()) {
-        originalLayers = layer->name;
+        originalLayers = layerName;
     } else {
-        originalLayers += "," + layer->name;
+        originalLayers += "," + layerName;
     }
 }
 
 void MKLDNNNode::cleanup() {
     internalBlobs.clear();
-    cnnLayer.reset();
 
     for (auto it : fusedWith) {
         it->cleanup();
@@ -1185,18 +1182,54 @@ InferenceEngine::Precision MKLDNNNode::getRuntimePrecision() const {
     return runtimePrecision;
 }
 
-MKLDNNNode* MKLDNNNode::NodesFactory::create(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
+MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
                                              const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache) {
     MKLDNNNode *newNode = nullptr;
-
-    std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(MKLDNNPlugin, Generic, layer, eng, w_cache));
-    if (ol != nullptr && ol->created(extMgr))
-        newNode = ol.release();
-
-    if (newNode == nullptr) {
-        std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(MKLDNNPlugin, TypeFromName(layer->type), layer, eng, w_cache));
+    std::string errorMessage;
+    try {
+        std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(MKLDNNPlugin, Generic, op, eng, w_cache));
         if (ol != nullptr && ol->created(extMgr))
             newNode = ol.release();
+    } catch (const InferenceEngine::Exception& ex) {
+        IE_SUPPRESS_DEPRECATED_START
+        if (ex.getStatus() != NOT_IMPLEMENTED) {
+            throw;
+        } else {
+            errorMessage += getExceptionDescWithoutStatus(ex);
+        }
+        IE_SUPPRESS_DEPRECATED_END
+    }
+
+    if (newNode == nullptr) {
+        try {
+            std::unique_ptr<MKLDNNNode> ol(createNodeIfRegistered(MKLDNNPlugin, TypeFromName(op->get_type_name()), op, eng, w_cache));
+            if (ol != nullptr && ol->created(extMgr))
+                newNode = ol.release();
+        } catch (const InferenceEngine::Exception& ex) {
+            IE_SUPPRESS_DEPRECATED_START
+            if (ex.getStatus() != NOT_IMPLEMENTED) {
+                throw;
+            } else {
+                errorMessage += getExceptionDescWithoutStatus(ex);
+            }
+            IE_SUPPRESS_DEPRECATED_END
+        }
+    }
+
+    if (newNode == nullptr) {
+        try {
+            std::unique_ptr<MKLDNNNode> ol(new MKLDNNReferenceNode(op, eng, w_cache, errorMessage));
+            if (ol != nullptr && ol->created(extMgr))
+                newNode = ol.release();
+        } catch (const InferenceEngine::Exception& ex) {
+            IE_SUPPRESS_DEPRECATED_START
+            if (ex.getStatus() != NOT_IMPLEMENTED) {
+                throw;
+            } else {
+                errorMessage += getExceptionDescWithoutStatus(ex);
+            }
+            IE_SUPPRESS_DEPRECATED_END
+        }
     }
 
     //  WA-start : TI node requires all attributes to construct internal subgpath
@@ -1206,8 +1239,75 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const InferenceEngine::CNNLayerPtr&
         ti->setExtManager(extMgr);
     //  WA-end
 
-    if (!newNode)
-        IE_THROW() << "Unsupported primitive of type: " << layer->type << " name: " << layer->name;
+    if (!newNode) {
+        std::string errorDetails;
+        if (!errorMessage.empty()) {
+            errorDetails = "\nDetails: \n" + errorMessage;
+        }
+        IE_THROW() << "Unsupported operation of type: " << op->get_type_name() << " name: " << op->get_friendly_name() << errorDetails;
+    }
 
     return newNode;
 }
+
+bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const {
+    size_t fusingPort = 0;
+    for (size_t i = (parentNode == nullptr ? 1 : 0); i < getParentEdges().size(); i++) {
+        MKLDNNNode *node = getParentEdgeAt(i)->getParent().get();
+        if (node == nullptr) {
+            IE_THROW() << "Cannot get parent node for " << getName() << " on " << i << " port";
+        }
+        if (node == parentNode) {
+            fusingPort = i;
+            continue;
+        }
+        if (!node->isConstant() || node->getType() != Input) {
+            return false;
+        }
+    }
+
+    const auto isBroadcastableToDataInput = [&]() {
+        const auto dataShape = getParentEdgeAt(fusingPort)->getDims().ToSizeVector();
+        for (size_t i = 0; i < getParentEdges().size(); i++) {
+            if (i == fusingPort)
+                continue;
+            auto weightShape = getParentEdgeAt(i)->getDims().ToSizeVector();
+            // [NM] TODO: PRelu is not broadcastable
+            // WA: [1,32,46,46], [32] -> [1,32,46,46], [1, 32, 1, 1]
+            if (getAlgorithm() == EltwisePrelu && weightShape.size() == 1 && weightShape.back() != 1) {
+                auto newWeightShape = std::vector<size_t>(dataShape.size(), 1);
+                newWeightShape[1] = weightShape[0];
+                weightShape = newWeightShape;
+            }
+            if (!isPerTensorOrPerChannelBroadcastable(dataShape, weightShape))
+                return false;
+        }
+        return true;
+    };
+
+    const auto isConvertablePowerStatic = [&]() {
+        if (getAlgorithm() == EltwisePowerStatic) {
+            const auto eltwise = dynamic_cast<const MKLDNNEltwiseNode *>(this);
+            if (!eltwise) {
+                IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
+            }
+            return eltwise->getAlpha() == 1.0f;
+        }
+        return false;
+    };
+
+    return (one_of(getAlgorithm(), EltwiseAdd, EltwiseMultiply, EltwiseSubtract, EltwiseDivide, EltwisePrelu, EltwiseMulAdd) && isBroadcastableToDataInput())
+            || isConvertablePowerStatic();
+}
+
+bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
+    if (node->getType() == FakeQuantize) {
+        return node->getAlgorithm() != FQBinarization;
+    } else if (node->getType() == Eltwise) {
+        return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
+                                            EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
+                                            EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
+                      node->canBePerformedAsScaleShift(this);
+    }
+    return false;
+}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index 1f598476081..5653bb36c91 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -11,7 +11,6 @@
 #include <cassert>
 #include <algorithm>
 #include <caseless.hpp>
-#include <ie_common.h>
 #include "mkldnn_dims.h"
 #include "mkldnn_memory.h"
 #include "mkldnn_edge.h"
@@ -23,13 +22,19 @@
 #include "mkldnn_weights_cache.hpp"
 #include "mkldnn.hpp"
 #include <openvino/itt.hpp>
+#include "utils/ngraph_utils.hpp"
+#include <ngraph/ops.hpp>
 #include <ngraph/node.hpp>
+#include <ie_precision.hpp>
+#include <nodes/common/tensor_desc_creator.h>
+#include "cpu_types.h"
 
 namespace MKLDNNPlugin {
 
 using MKLDNNNodePtr = std::shared_ptr<MKLDNNNode>;
 using MKLDNNNodeWeakPtr = std::weak_ptr<MKLDNNNode>;
 
+// TODO [NM]: move into separate header
 enum Type {
     Unknown,
     Generic,
@@ -38,57 +43,54 @@ enum Type {
     Output,
     Convolution,
     Deconvolution,
-    Activation,
-    Depthwise,
     Lrn,
     Pooling,
     FullyConnected,
-    SoftMax,
+    Softmax,
     Split,
     Concatenation,
     Eltwise,
-    Gemm,
+    MatMul,
     Reshape,
     Tile,
-    SimplerNMS,
     ROIAlign,
     ROIPooling,
-    BatchNormalization,
+    PSROIPooling,
+    BatchToSpace,
     DepthToSpace,
-    Flatten,
     Pad,
-    Permute,
+    Transpose,
+    SpaceToBatch,
     SpaceToDepth,
     StridedSlice,
-    Copy,
     MemoryOutput,
     MemoryInput,
     RNNCell,
     RNNSeq,
-    Quantize,
+    FakeQuantize,
     BinaryConvolution,
     DeformableConvolution,
     TensorIterator,
     Convert,
     MVN,
-    Normalize,
+    NormalizeL2,
     ScatterUpdate,
     ScatterElementsUpdate,
     ScatterNDUpdate,
     Interpolate,
-    ReduceAnd,
-    ReduceL1,
-    ReduceL2,
-    ReduceLogSum,
-    ReduceLogSumExp,
-    ReduceMax,
-    ReduceMean,
-    ReduceMin,
-    ReduceOr,
-    ReduceProd,
-    ReduceSum,
-    ReduceSumSquare,
-    Roll
+    Reduce,
+    Broadcast,
+    EmbeddingSegmentsSum,
+    EmbeddingBagPackedSum,
+    EmbeddingBagOffsetsSum,
+    Gather,
+    GatherElements,
+    GatherND,
+    OneHot,
+    RegionYolo,
+    Select,
+    Roll,
+    Reference,
 };
 
 Type TypeFromName(const std::string type);
@@ -107,50 +109,44 @@ static std::string NameFromType(Type type) {
             return "Convolution";
         case Deconvolution:
             return "Deconvolution";
-        case Activation:
-            return "Activation";
         case Lrn:
             return "Lrn";
         case Pooling:
             return "Pooling";
         case FullyConnected:
             return "FullyConnected";
-        case Gemm:
-            return "Gemm";
-        case SoftMax:
-            return "SoftMax";
+        case MatMul:
+            return "MatMul";
+        case Softmax:
+            return "Softmax";
         case Split:
             return "Split";
         case Concatenation:
             return "Concatenation";
-        case Depthwise:
-            return "Depthwise";
+        case StridedSlice:
+            return "StridedSlice";
         case Reshape:
             return "Reshape";
         case Tile:
             return "Tile";
-        case SimplerNMS:
-            return "SimplerNMS";
         case ROIAlign:
             return "ROIAlign";
         case ROIPooling:
             return "ROIPooling";
-        case BatchNormalization:
-            return "BatchNormalization";
+        case PSROIPooling:
+            return "PSROIPooling";
         case DepthToSpace:
             return "DepthToSpace";
-        case Flatten:
-            return "Flatten";
+        case BatchToSpace:
+            return "BatchToSpace";
         case Pad:
             return "Pad";
-        case Permute:
-            return "Permute";
+        case Transpose:
+            return "Transpose";
         case SpaceToDepth:
             return "SpaceToDepth";
-        case StridedSlice:
-            return "StridedSlice";
-        case Copy:
-            return "Copy";
+        case SpaceToBatch:
+            return "SpaceToBatch";
         case MemoryOutput:
             return "MemoryOutput";
         case MemoryInput:
@@ -161,8 +157,8 @@ static std::string NameFromType(Type type) {
             return "RNNCell";
         case Eltwise:
             return "Eltwise";
-        case Quantize:
-            return "Quantize";
+        case FakeQuantize:
+            return "FakeQuantize";
         case BinaryConvolution:
             return "BinaryConvolution";
         case DeformableConvolution:
@@ -173,8 +169,8 @@ static std::string NameFromType(Type type) {
             return "TensorIterator";
         case Convert:
             return "Convert";
-        case Normalize:
-            return "Normalize";
+        case NormalizeL2:
+            return "NormalizeL2";
         case ScatterUpdate:
             return "ScatterUpdate";
         case ScatterElementsUpdate:
@@ -183,30 +179,28 @@ static std::string NameFromType(Type type) {
             return "ScatterNDUpdate";
         case Interpolate:
             return "Interpolate";
-        case ReduceAnd:
-            return "ReduceAnd";
-        case ReduceL1:
-            return "ReduceL1";
-        case ReduceL2:
-            return "ReduceL2";
-        case ReduceLogSum:
-            return "ReduceLogSum";
-        case ReduceLogSumExp:
-            return "ReduceLogSumExp";
-        case ReduceMax:
-            return "ReduceMax";
-        case ReduceMean:
-            return "ReduceMean";
-        case ReduceMin:
-            return "ReduceMin";
-        case ReduceOr:
-            return "ReduceOr";
-        case ReduceProd:
-            return "ReduceProd";
-        case ReduceSum:
-            return "ReduceSum";
-        case ReduceSumSquare:
-            return "ReduceSumSquare";
+        case Reduce:
+            return "Reduce";
+        case Broadcast:
+            return "Broadcast";
+        case EmbeddingSegmentsSum:
+            return "EmbeddingSegmentsSum";
+        case EmbeddingBagPackedSum:
+            return "EmbeddingBagPackedSum";
+        case EmbeddingBagOffsetsSum:
+            return "EmbeddingBagPackedSum";
+        case Gather:
+            return "Gather";
+        case GatherElements:
+            return "GatherElements";
+        case GatherND:
+            return "GatherND";
+        case OneHot:
+            return "OneHot";
+        case RegionYolo:
+            return "RegionYolo";
+        case Select:
+            return "Select";
         case Roll:
             return "Roll";
         default:
@@ -269,6 +263,31 @@ private:
     std::vector<mkldnn::memory::format_tag> outputLayouts;
 };
 
+class DataConfigurator {
+public:
+    DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc, const InferenceEngine::SizeVector& shape,
+                     bool constant = false, int inplace = -1) :
+            tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape(shape), constant(constant), inplace(inplace) {}
+
+    DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED,
+                     bool constant = false, int inplace = -1) :
+            tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape({}), constant(constant), inplace(inplace) {}
+
+    const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator;
+    const InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED;
+    const InferenceEngine::SizeVector shape;
+    const bool constant = false;
+    const int inplace = -1;
+private:
+    static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) {
+        auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators();
+        if (creators.find(tensorDescType) == creators.end()) {
+            IE_THROW() << "Cannot find tensor descriptor creator";
+        }
+        return creators.at(tensorDescType);
+    }
+};
+
 class MKLDNNNode : public InferenceEngine::details::no_copy {
 public:
     template<typename T, int N>
@@ -343,8 +362,35 @@ public:
 
     bool isFusedWith(Type type) const;
 
-    void fuseWith(const MKLDNNNodePtr &fuse) {
-        fusedWith.push_back(fuse);
+    void addFusedNode(const MKLDNNNodePtr &fusingNode) {
+        fusedWith.push_back(fusingNode);
+    }
+
+    virtual void fuseInto(MKLDNNNodePtr& parentNode) {
+        // The graph supports fusing only of consecutive nodes and some graph logic requires to know through which input port a node was fused into parent one.
+        for (int i = 0; i < getParentEdges().size(); i++) {
+            if (getParentEdgesAtPort(i)[0]->getParent().get() == parentNode.get()) {
+                setFusingPort(i);
+                break;
+            }
+        }
+
+        auto parentFusedNodes = parentNode->getFusedWith();
+        if (getFusingPort() < 0 && !parentFusedNodes.empty()) {
+            for (int i = 0; i < getParentEdges().size(); i++) {
+                if (getParentEdgesAtPort(i)[0]->getParent().get() == parentFusedNodes[parentFusedNodes.size() - 1].get()) {
+                    setFusingPort(i);
+                    break;
+                }
+            }
+        }
+
+        if (getFusingPort() == -1) {
+            IE_THROW() << "Cannot determine fusing port between nodes: " << parentNode->getName() << " and " << getName();
+        }
+
+        parentNode->addFusedNode(getParentEdgesAtPort(getFusingPort())[0]->getChild());
+        parentNode->addOriginalLayer(getOriginalLayers());
     }
 
     void clearFusedWith() {
@@ -355,8 +401,6 @@ public:
         mergedWith.push_back(merge);
     }
 
-    void addOriginalLayer(const InferenceEngine::CNNLayerPtr &layer);
-
     const std::vector <MKLDNNNodePtr> &getMergeWith() {
         return mergedWith;
     }
@@ -365,10 +409,20 @@ public:
         return fusedWith;
     }
 
+    int getFusingPort() const {
+        return fusingPort;
+    }
+
+    void setFusingPort(int fusingPort) {
+        this->fusingPort = fusingPort;
+    }
+
     const std::string getName() const {
         return name;
     }
 
+    void addOriginalLayer(const std::string& layerName);
+
     const std::string getOriginalLayers() const {
         return originalLayers;
     }
@@ -377,10 +431,6 @@ public:
         return type;
     }
 
-    const InferenceEngine::CNNLayerPtr &getCnnLayer() const {
-        return cnnLayer;
-    }
-
     const std::vector<PrimitiveDescInfo>& getSupportedPrimitiveDescriptors() const {
         return supportedPrimitiveDescriptors;
     }
@@ -493,15 +543,6 @@ public:
         IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
     }
 
-    static void invertVectorCopyUtoI(const InferenceEngine::PropertyVector<unsigned int>& src, std::vector<ptrdiff_t>& dst) {
-        dst.clear();
-        for (int i = 1; i <= src.size(); i++) {
-            dst.push_back(static_cast<ptrdiff_t>(src[src.size() - i]));
-        }
-    }
-
-    std::vector<MKLDNNDims> inDims;
-
     int getExecIndex() const {
         return execIndex;
     }
@@ -510,6 +551,10 @@ public:
         return typeStr;
     }
 
+    void setTypeStr(const std::string &typeStr) {
+        this->typeStr = typeStr;
+    }
+
     virtual size_t descInputNumbers(MKLDNNDescriptor desc) {
         return desc.inputNumbers();
     }
@@ -532,9 +577,72 @@ public:
      */
     virtual InferenceEngine::Precision getRuntimePrecision() const;
 
+    const std::vector<InferenceEngine::Precision>& getOriginalInputPrecisions() const {
+        return originalInputPrecisions;
+    }
+    const std::vector<InferenceEngine::Precision>& getOriginalOutputPrecisions() const {
+        return originalOutputPrecisions;
+    }
+
+    InferenceEngine::Precision getOriginalInputPrecisionAtPort(size_t port) const {
+        if (originalInputPrecisions.size() <= port) {
+            IE_THROW() << "Incorrect input port number for node " << getName();
+        }
+        return originalInputPrecisions[port];
+    }
+    InferenceEngine::Precision getOriginalOutputPrecisionAtPort(size_t port) const {
+        if (originalOutputPrecisions.size() <= port) {
+            IE_THROW() << "Incorrect output port number for node " << getName();
+        }
+        return originalOutputPrecisions[port];
+    }
+
+    void setOriginalInputPrecisionAtPort(size_t port, InferenceEngine::Precision precision) {
+        if (originalInputPrecisions.size() <= port) {
+            IE_THROW() << "Incorrect input port number for node " << getName();
+        }
+        originalInputPrecisions[port] = precision;
+    }
+
+    void setOriginalOutputPrecisionAtPort(size_t port, InferenceEngine::Precision precision) {
+        if (originalOutputPrecisions.size() <= port) {
+            IE_THROW() << "Incorrect output port number for node " << getName();
+        }
+        originalOutputPrecisions[port] = precision;
+    }
+
+    void addOriginalInputPrecision(InferenceEngine::Precision precision) {
+        originalInputPrecisions.push_back(precision);
+    }
+
+    void addOriginalOutputPrecision(InferenceEngine::Precision precision) {
+        originalOutputPrecisions.push_back(precision);
+    }
+
+    size_t getOriginalInputsNumber() const {
+        return originalInputPrecisions.size();
+    }
+
+    size_t getOriginalOutputsNumber() const {
+        return originalOutputPrecisions.size();
+    }
+
+    Algorithm getAlgorithm() const {
+        return algorithm;
+    }
+
+    void setAlgorithm(Algorithm alg) {
+        algorithm = alg;
+    }
+
+    virtual bool canFuse(const MKLDNNNodePtr& node) const {
+        return false;
+    }
+
 protected:
-    // TODO: It is necessary only in order to avoid modifications of cnnLayers and original topology
-    std::vector<MKLDNNDims> outDims;
+    bool canBePerformedAsScaleShift(const MKLDNNNode *parentNode = nullptr) const;
+    bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;
+
     void setType(Type type) {
         this->type = type;
     }
@@ -559,6 +667,9 @@ protected:
             GetPrimitiveMemoryFormatFunc;
     std::vector<GetPrimitiveMemoryFormatFunc> internalBlobDesc;
 
+    std::vector<MKLDNNDims> inDims;
+    std::vector<MKLDNNDims> outDims;
+
     std::vector <MKLDNNNodePtr> fusedWith;
     std::vector <MKLDNNNodePtr> mergedWith;
     std::vector <impl_desc_type> implPriorities;
@@ -567,7 +678,8 @@ protected:
 
     std::string originalLayers;  // contains names of the original layers separated by comma
 
-    MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);
+    MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);
+    MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache);
 
     int selectedPrimitiveDescriptorIndex = -1;
     bool permanent = false;
@@ -589,6 +701,8 @@ protected:
     InferenceEngine::Blob::Ptr ext_scales;
     MKLDNNWeightsSharing::Ptr weightCache;
 
+    Algorithm algorithm = Algorithm::Undefined;
+
     friend class MKLDNNEdge;
     friend class MKLDNNGraph;
     friend class MKLDNNGraphOptimizer;
@@ -604,8 +718,6 @@ protected:
     virtual std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const MKLDNNDims& dims) const;
     int batchToProcess();
 
-    InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, bool weights, bool is_grouped = false);
-
     InferenceEngine::Layout getWeightsLayoutByDims(InferenceEngine::SizeVector dims, bool isGrouped);
 
     /**
@@ -620,15 +732,62 @@ protected:
      */
     virtual std::vector<InferenceEngine::Precision> getOutputPrecisions() const;
 
+    void addSupportedPrimDesc(const std::vector<DataConfigurator>& inDataConfigurators,
+                              const std::vector<DataConfigurator>& outDataConfigurators,
+                              impl_desc_type implType,
+                              bool dynBatchSupport = false) {
+        auto fill_port = [] (const DataConfigurator& dataConfigurator, const InferenceEngine::SizeVector& dims,
+                             InferenceEngine::Precision prc, std::vector<InferenceEngine::DataConfig>& port) -> bool {
+            // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator.
+            // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank.
+            if (dims.size() < dataConfigurator.tensorDescCreator->getMinimalRank())
+                return false;
+
+            InferenceEngine::DataConfig dataConfig;
+            dataConfig.inPlace = dataConfigurator.inplace;
+            dataConfig.constant = dataConfigurator.constant;
+
+            dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(prc, dims);
+
+            port.push_back(dataConfig);
+
+            return true;
+        };
+
+        InferenceEngine::LayerConfig config;
+        for (size_t i = 0; i < inDataConfigurators.size(); i++) {
+            auto dims = inDataConfigurators[i].shape.empty() ? getParentEdgesAtPort(i)[0]->getDims().ToSizeVector() : inDataConfigurators[i].shape;
+            auto prc = inDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i)
+                                                                                             : inDataConfigurators[i].prc;
+            if (!fill_port(inDataConfigurators[i], dims, prc, config.inConfs))
+                return;
+        }
+
+        for (size_t i = 0; i < outDataConfigurators.size(); i++) {
+            auto dims = outDataConfigurators[i].shape.empty() ? getChildEdgesAtPort(i)[0]->getDims().ToSizeVector() : outDataConfigurators[i].shape;
+            auto prc = outDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i)
+                                                                                              : outDataConfigurators[i].prc;
+            if (!fill_port(outDataConfigurators[i], dims, prc, config.outConfs))
+                return;
+        }
+
+        config.dynBatchSupport = dynBatchSupport;
+        supportedPrimitiveDescriptors.push_back({config, implType});
+    }
+
 private:
     std::vector<MKLDNNEdgeWeakPtr> parentEdges;
     std::vector<MKLDNNEdgeWeakPtr> childEdges;
 
-    InferenceEngine::CNNLayerPtr cnnLayer;
+    std::vector<InferenceEngine::Precision> originalInputPrecisions;
+    std::vector<InferenceEngine::Precision> originalOutputPrecisions;
+
+    int fusingPort;
+
     mkldnn::engine engine;
 
     std::string name;
-    const std::string typeStr;
+    std::string typeStr;
     Type type;
     int execIndex = -1;
 
@@ -660,21 +819,21 @@ private:
 };
 
 class MKLDNNNode::NodesFactory : public openvino::cc::Factory<Type,
-                                            MKLDNNNode*(const InferenceEngine::CNNLayerPtr&,
+                                            MKLDNNNode*(const std::shared_ptr<ngraph::Node>& op,
                                                         const mkldnn::engine &,
                                                         MKLDNNWeightsSharing::Ptr &)> {
 public:
     NodesFactory()
         : Factory("NodesFactory") {}
 
-    MKLDNNNode* create(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
+    MKLDNNNode* create(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
                        const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache);
 };
 
 template<typename MKLDNNNodeType>
 struct MKLDNNNodeImpl : public MKLDNNNodeType {
-    MKLDNNNodeImpl(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNodeType(layer, eng, cache) {
+    MKLDNNNodeImpl(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNodeType(op, eng, cache) {
         MKLDNNNodeType::perfCounters().template buildClassCounters<MKLDNNNodeType>(NameFromType(MKLDNNNodeType::getType()));
     }
 };
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 95302f9d442..6b1ccc16282 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -8,7 +8,6 @@
 #include "mkldnn_weights_cache.hpp"
 #include "mkldnn_itt.h"
 
-#include <legacy/net_pass.h>
 #include <threading/ie_executor_manager.hpp>
 #include <memory>
 #include <ie_plugin_config.hpp>
@@ -16,19 +15,8 @@
 #include <tuple>
 #include <ie_system_conf.h>
 #include <nodes/list.hpp>
-#include <legacy/ie_util_internal.hpp>
-#include <legacy/graph_transformer.h>
 #include <ie_ngraph_utils.hpp>
 
-#include <legacy/convert_function_to_cnn_network.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/reshape_fully_connected.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_nms_5_to_legacy.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.hpp>
-#include <legacy/ngraph_ops/fully_connected.hpp>
-
 #include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
 #include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
 
@@ -62,6 +50,8 @@
 #include <transformations/op_conversions/log_softmax_decomposition.hpp>
 #include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
 #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
+#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
+#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
 #include <transformations/convert_precision.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/rt_info/fused_names_attribute.hpp>
@@ -86,8 +76,13 @@
 #include <low_precision/multiply_to_group_convolution.hpp>
 #include <low_precision/network_helper.hpp>
 
+#include <ie_algorithm.hpp>
+
+#include <ngraph/pass/visualize_tree.hpp>
+
 #include "nodes/mkldnn_mvn_node.h"
-#include "nodes/mkldnn_quantize_node.h"
+#include "nodes/mkldnn_fake_quantize_node.h"
+#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
 
 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
 # ifdef _WIN32
@@ -127,8 +122,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
     }
 
     // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
-    manager.register_pass<ngraph::pass::ConvertPriorBox>();
-    manager.register_pass<ngraph::pass::ConvertNMS5ToLegacyMatcher>();
     manager.register_pass<ngraph::pass::CommonOptimizations>();
     manager.register_pass<ngraph::pass::ConvertRNNSequenceToTensorIterator>();
     manager.register_pass<ngraph::pass::ConvertGRUSequenceToTensorIterator>();
@@ -141,6 +134,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
     manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
     manager.register_pass<ngraph::pass::GRUCellDecomposition>();
     manager.register_pass<ngraph::pass::RNNCellDecomposition>();
+    manager.register_pass<ngraph::pass::ConvertNMS1ToNMS5>();
+    manager.register_pass<ngraph::pass::ConvertNMS3ToNMS5>();
+    manager.register_pass<ngraph::pass::ConvertNMS4ToNMS5>();
+    manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
+    manager.register_pass<ngraph::pass::ConstantFolding>();
 
     std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list{
             {ngraph::element::i64,     ngraph::element::i32},
@@ -155,6 +153,10 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
             {ngraph::element::u4, ngraph::element::u8},
     };
 
+    // In case BF16 is not supported by the target CPU we explicitly convert it to FP32
+    if (!with_cpu_x86_avx512_core())
+        convert_precision_list.push_back({ngraph::element::bf16, ngraph::element::f32});
+
     for (auto &precision : convert_precision_list) {
         manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
     }
@@ -171,12 +173,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
                        node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
             });
 
-    // Disable FC reshaping for 3D case
-    pass_config->set_callback<ngraph::pass::ReshapeFullyConnected>(
-            [](const_node_ptr &node) -> bool {
-                return node->input_value(0).get_shape().size() == 3ul;
-            });
-
     pass_config->set_callback<ngraph::pass::ConvertBatchToSpace,
                               ngraph::pass::ConvertSpaceToBatch>(
             [](const_node_ptr &node) -> bool {
@@ -260,7 +256,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
 
     pass_config->set_callback<ngraph::pass::MVN6Decomposition>(
             [](const_node_ptr &node) -> bool {
-                return MKLDNNMVNNode::checkAxesSuitability(node);
+                std::string errorMessage;
+                return MKLDNNMVNNode::isSupportedOperation(node, errorMessage);
             });
 
     pass_config->set_callback<ngraph::pass::SoftmaxFusion>(
@@ -279,7 +276,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
     pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
     pass_config->disable<ngraph::pass::ConvertMod>();
     pass_config->disable<ngraph::pass::LogSoftmaxDecomposition>();
-    pass_config->disable<ngraph::pass::ConvertInterpolateToInterpOrResampleMatcher>();
     pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
     pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
 
@@ -325,57 +321,35 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
         transformer.transform(nGraphFunc);
     }
 
-    bool has_fake_quantize = ::ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
+    ngraph::pass::Manager postLPTPassManager;
+    postLPTPassManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
+    postLPTPassManager.register_pass<ngraph::pass::UnrollTensorIterator>();
 
-    ngraph::pass::Manager legacyManager;
-
-    legacyManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
-    legacyManager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
-    legacyManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
-    // not legacy actually, but it should be the last transformation in the transformation pipeline
-    legacyManager.register_pass<ngraph::pass::UnrollTensorIterator>();
-
-    auto legacyPassConfig = legacyManager.get_pass_config();
-    legacyPassConfig->disable<ngraph::pass::ConvertStridedSliceToCropMatcher>();
-
-    legacyPassConfig->set_callback<ngraph::pass::FakeQuantizeDecomposition>([](const_node_ptr &node) -> bool {
-        return !MKLDNNQuantizeNode::isNeedToDecompose(node);
+    postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::FakeQuantizeDecomposition>([](const_node_ptr &node) -> bool {
+        std::string errMsg;
+        return MKLDNNFakeQuantizeNode::isSupportedOperation(node, errMsg);
     });
-
-    legacyPassConfig->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
+    postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
         if (auto mul_op = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
             auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(mul_op->get_input_node_shared_ptr(0));
             auto constant = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(mul_op->get_input_node_shared_ptr(1));
             bool is_dequantization = mul_op->get_rt_info().count("DEQUANTIZATION") != 0;
             if (add_op && constant && is_dequantization) {
                 return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) ||
-                    ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
-                    ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
+                       ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
+                       ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
             }
         }
         return false;
     });
-
-    legacyPassConfig->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
+    postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
         // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
         return node->get_rt_info().count("UNROLL_TI") == 0;
     });
 
-    legacyManager.run_passes(nGraphFunc);
+    postLPTPassManager.run_passes(nGraphFunc);
 
-    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork");
-
-    clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize));
-
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "ConvertIOPrecision");
-
-    // WA: after conversion to CNNNetwork user precision can redefine input/output precisions
-    // so we need to apply additional precision conversion but only for inputs and outputs
-    for (auto & precision : convert_precision_list) {
-        NetPass::ConvertIOPrecision(clonedNetwork,
-            InferenceEngine::details::convertPrecision(precision.first),
-            InferenceEngine::details::convertPrecision(precision.second));
-    }
+    ConvertToCPUSpecificOpset(nGraphFunc);
 }
 
 InferenceEngine::ExecutableNetworkInternal::Ptr
@@ -411,34 +385,9 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
         conf.batchLimit = static_cast<int>(network.getBatchSize());
     }
 
-    CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(network);
+    CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);
 
-    bool is_transformed = false;
-    if (clonedNetwork.getFunction()) {
-        Transformation(clonedNetwork, conf);
-        is_transformed = true;
-    }
-    IE_SUPPRESS_DEPRECATED_START
-    auto icnnnet = static_cast<ICNNNetwork::Ptr>(clonedNetwork);
-    IE_SUPPRESS_DEPRECATED_END
-    auto implNetwork = std::dynamic_pointer_cast<details::CNNNetworkImpl>(icnnnet);
-    if (implNetwork) {
-        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding");
-        // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
-        ConstTransformer transformator(implNetwork.get());
-        transformator.fullTrim();
-        if (!is_transformed) {
-            InferenceEngine::CNNNetwork implNetworkWrapper(implNetwork);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::I64, Precision::I32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::U64, Precision::I32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::U32, Precision::I32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::FP64, Precision::FP32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::FP16, Precision::FP32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::BOOL, Precision::U8);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::U16, Precision::I32);
-            NetPass::ConvertPrecision(implNetworkWrapper, Precision::I16, Precision::I32);
-        }
-    }
+    Transformation(clonedNetwork, conf);
 
     return std::make_shared<MKLDNNExecNetwork>(clonedNetwork, conf, extensionManager, weightsSharing);
 }
@@ -540,6 +489,7 @@ void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
 
 QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::map<std::string, std::string>& config) const {
     QueryNetworkResult res;
+
     MKLDNNWeightsSharing::Ptr fake_w_cache;
     auto function = network.getFunction();
     if (function != nullptr) {
@@ -556,21 +506,22 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
             conf.batchLimit = static_cast<int>(network.getBatchSize());
         }
 
-        auto clonedNetwork = InferenceEngine::cloneNetwork(network);
+        auto clonedNetwork = InferenceEngine::details::cloneNetwork(network);
+        auto ops = clonedNetwork.getFunction()->get_ordered_ops();
         Transformation(clonedNetwork, conf);
         std::unordered_set<std::string> supported;
         std::unordered_set<std::string> unsupported;
-        for (details::CNNNetworkIterator itLayer{clonedNetwork}; itLayer != details::CNNNetworkIterator(); itLayer++) {
+        for (auto op : ops) {
             auto layerIsSupported = [&] {
                 std::unique_ptr<MKLDNNNode> ptr;
                 try {
-                    ptr.reset(MKLDNNNode::factory().create(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
+                    ptr.reset(MKLDNNNode::factory().create(op, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
                 } catch (InferenceEngine::Exception&) {
-                     return false;
+                    return false;
                 }
                 return true;
             } ();
-            for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) {
+            for (auto&& fusedLayerName : ngraph::getFusedNamesVector(op)) {
                 if (InferenceEngine::details::contains(originalOps, fusedLayerName)) {
                     if (layerIsSupported) {
                         supported.emplace(fusedLayerName);
@@ -614,17 +565,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
             res.supportedLayersMap.emplace(layerName, GetName());
         }
     } else {
-        details::CNNNetworkIterator i(network);
-        while (i != details::CNNNetworkIterator()) {
-            try {
-                mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
-                // if we can create and have not thrown exception, then layer is supported
-                std::unique_ptr <MKLDNNNode>(MKLDNNNode::factory().create(*i, eng, extensionManager, fake_w_cache));
-                res.supportedLayersMap.insert({ (*i)->name, GetName() });
-            } catch (InferenceEngine::Exception&) {
-            }
-            i++;
-        }
+        IE_THROW() << "CPU plug-in doesn't support not ngraph-based model!";
     }
 
     return res;
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.cpp
new file mode 100644
index 00000000000..899a062ab04
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.cpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_broadcast_to_tiles.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertBroadcastToTiles, "ConvertBroadcastToTiles", 0);
+
+MKLDNNPlugin::ConvertBroadcastToTiles::ConvertBroadcastToTiles() {
+    auto broadcast = ngraph::pattern::wrap_type<ngraph::opset1::Broadcast>();
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto broadcast = std::dynamic_pointer_cast<ngraph::opset1::Broadcast>(m.get_match_root());
+
+        if (!broadcast) {
+            return false;
+        }
+
+        auto data_node = broadcast->input_value(0);
+        if (data_node.get_partial_shape().is_dynamic()) {
+            return false;
+        }
+
+        auto shape_node = std::dynamic_pointer_cast<ngraph::opset1::Constant>(broadcast->input_value(1).get_node_shared_ptr());
+        auto axes_node = std::dynamic_pointer_cast<ngraph::opset1::Constant>(broadcast->input_value(2).get_node_shared_ptr());
+        if (!shape_node || !axes_node) return false;
+
+        auto output_shape = shape_node->cast_vector<int64_t>();
+        auto input_shape = data_node.get_shape();
+        int64_t cur_dim_id = output_shape.size() - 1;
+        size_t dims_count = output_shape.size();
+
+        auto last_node = data_node;
+
+        ngraph::NodeVector new_ops;
+
+        // In case if input_shape and output_shape differ we insert Reshape to align shapes
+        if (input_shape.size() != dims_count) {
+            if (input_shape.size() > dims_count) {
+                return false;
+            }
+            ngraph::Shape shape;
+            auto broadcast_type = broadcast->get_broadcast_spec();
+            if (broadcast_type == ngraph::op::AutoBroadcastType::NUMPY) {
+                shape = input_shape;
+                for (size_t i = 0; i < (dims_count - input_shape.size()); ++i) {
+                    shape.insert(shape.begin(), 1);
+                }
+            } else if (broadcast_type == ngraph::op::AutoBroadcastType::NONE) {
+                auto axes = axes_node->cast_vector<int64_t>();
+                shape.assign(output_shape.size(), 1);
+                for (size_t i = 0; i < input_shape.size(); ++i) {
+                    shape[axes[i]] = input_shape[i];
+                }
+            } else {
+                return false;
+            }
+            auto shape_const = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{shape.size()}, shape);
+            auto reshape = std::make_shared<ngraph::opset1::Reshape>(data_node, shape_const, true);
+            new_ops.push_back(reshape);
+            last_node = reshape;
+            input_shape = shape;
+        }
+
+        std::vector<int64_t> dims(dims_count, 1);
+        auto input_shape_it = input_shape.rbegin();
+        auto output_shape_it = output_shape.rbegin();
+        while (output_shape_it != output_shape.rend() && input_shape_it != input_shape.rend()) {
+            int64_t in_dim = *input_shape_it, out_dim = *output_shape_it;
+            if (in_dim != out_dim) {
+                if (in_dim != 1) {
+                    return false;
+                }
+                dims[cur_dim_id] = out_dim;
+            }
+
+            --cur_dim_id;
+            ++output_shape_it;
+            ++input_shape_it;
+        }
+
+        auto const_node = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{dims_count}, dims);
+        auto tile = register_new_node<ngraph::opset1::Tile>(last_node, const_node);
+        new_ops.push_back(tile);
+        tile->set_friendly_name(broadcast->get_friendly_name());
+
+        ngraph::copy_runtime_info(broadcast, new_ops);
+        ngraph::replace_node(broadcast, tile);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(broadcast, "ConvertBroadcastToTiles");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.hpp
new file mode 100644
index 00000000000..a89ee785e67
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_broadcast_to_tiles.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertBroadcastToTiles: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertBroadcastToTiles();
+};
+
+} // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.cpp
new file mode 100644
index 00000000000..b28d4f5aec5
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.cpp
@@ -0,0 +1,251 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_matmul_to_fc_or_gemm.hpp"
+#include "op/fully_connected.hpp"
+#include <numeric>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <transformations/utils/utils.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertMatMulToFC, "ConvertMatMulToFC", 0);
+
+MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
+    auto matmul = ngraph::pattern::wrap_type<ngraph::opset1::MatMul>({ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                                                      ngraph::pattern::any_input(ngraph::pattern::has_static_shape())},
+                                                                      ngraph::pattern::has_static_shape());
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto matmul = std::dynamic_pointer_cast<ngraph::opset1::MatMul>(m.get_match_root());
+        if (!matmul) {
+            return false;
+        }
+
+        auto input_a = matmul->input(0).get_source_output();
+        auto input_b = matmul->input(1).get_source_output();
+
+        auto shape_a = input_a.get_shape();
+        auto shape_b = input_b.get_shape();
+        auto output_shape = matmul->get_shape();
+
+        // Transformation to FC is not supported for 1D second input
+        if (shape_b.size() == 1) {
+            return false;
+        }
+
+        /*
+         *  get_aligned_shapes function align two input shapes to have the same size and
+         *  the same batch dimensions (last two dimensions are not comparable).
+         *  It also checks that dimensions are compatible so in case with two shapes
+         *  for example: [2, 32, 64] [3, 64, 64] it will raise an exception.
+         */
+
+        auto get_aligned_shapes = [shape_a, shape_b, &matmul]() -> std::pair<ngraph::Shape, ngraph::Shape> {
+            ngraph::Shape shape_a_aligned(shape_a), shape_b_aligned(shape_b);
+            size_t max_size = std::max(shape_a_aligned.size(), shape_b_aligned.size());
+            for (size_t i = 0, cnt = max_size - shape_a_aligned.size(); i < cnt; ++i)
+                shape_a_aligned.insert(shape_a_aligned.begin(), 1);
+            for (size_t i = 0, cnt = max_size - shape_b_aligned.size(); i < cnt; ++i)
+                shape_b_aligned.insert(shape_b_aligned.begin(), 1);
+
+            if (matmul->get_transpose_a() && shape_a.size() != 1) {
+                std::swap(*(shape_a_aligned.end() - 1), *(shape_a_aligned.end() - 2));
+            }
+            if (matmul->get_transpose_b()) {
+                std::swap(*(shape_b_aligned.end() - 1), *(shape_b_aligned.end() - 2));
+            }
+
+            for (size_t i = 0; i < max_size - 2; ++i) {
+                if (shape_a_aligned[i] != shape_b_aligned[i] && shape_a_aligned[i] > 1 && shape_b_aligned[i] > 1) {
+                    std::ostringstream stream;
+                    stream << "Shapes can't be aligned: " << shape_a_aligned << " " << shape_b_aligned;
+                    throw ngraph::ngraph_error(stream.str());
+                }
+                size_t max_value = std::max(shape_a_aligned[i], shape_b_aligned[i]);
+                shape_a_aligned[i] = shape_b_aligned[i] = max_value;
+            }
+
+            return {shape_a_aligned, shape_b_aligned};
+        };
+
+        /*
+         *  create_transpose function return Transpose operation to replace transpose_a or transpose_b
+         *  arguments with an operation. In other words in this function we create Transpose operation
+         *  with order length equal to output_shape length of given node and fill order with increasing
+         *  sequence starting from 0 and replace last two dimension. For example for length = 4  the
+         *  order will be [0, 1, 3, 2] that emulates transpose_a or transpose_b attribute.
+         */
+
+        auto create_transpose = [this](ngraph::Output<ngraph::Node> node, const std::string& transpose_name) -> std::shared_ptr<ngraph::Node> {
+            ngraph::Shape output_shape = node.get_node_shared_ptr()->get_shape();
+
+            std::vector<size_t> transpose_order(output_shape.size());
+            std::iota(transpose_order.begin(), transpose_order.end(), 0);
+            std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2));
+
+            auto transpose = ngraph::pass::MatcherPass::register_new_node<ngraph::opset1::Transpose>(
+                    node, ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{transpose_order.size()}, transpose_order));
+            transpose->set_friendly_name(transpose_name);
+            return transpose;
+        };
+
+        // fc_input_a and fc_input_b - are the final inputs that will be set to FullyConnected of GemmIE operations.
+        // So in case of adding new operations that takes matmul inputs we need keep update fc_input_a and
+        // fc_input_b updated.
+        auto fc_input_a = input_a, fc_input_b = input_b;
+
+        // vector of new nGraph operations
+        ngraph::NodeVector new_ops;
+
+        // Check that if second inputs is Constant operation and it's shape without ones dimensions has length <= 2
+        // we replace MatMul with FullyConnected operation.
+        // Otherwise we replace MatMul with Gemm.
+        if ((std::dynamic_pointer_cast<ngraph::opset1::Constant>(fc_input_b.get_node_shared_ptr()) ||
+             std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(fc_input_b.get_node_shared_ptr())) &&
+             std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2) {
+            ngraph::Shape shape_a_aligned, shape_b_aligned;
+            std::tie(shape_a_aligned, shape_b_aligned) = get_aligned_shapes();
+
+            if (shape_a_aligned.size() < 2 || shape_b_aligned.size() < 2) {
+                throw ngraph::ngraph_error("MatMul " + matmul->get_friendly_name() + " shapes are inconsistent.");
+            }
+
+            // Transferring from MatMul representation: [B, I, K] * [B, K, O] = [B, I, O]
+            // to FullyConnected representation: [I, K] * [K, O] = [I, O]
+            size_t K = *(shape_a_aligned.end() - 1);
+            ngraph::Shape B(shape_a_aligned.begin(), shape_a_aligned.end() - 2);
+
+            // Weights normalization
+            if (!matmul->get_transpose_b()) {
+                fc_input_b = create_transpose(fc_input_b, matmul->get_friendly_name() + "/transpose_b");
+                new_ops.push_back(fc_input_b.get_node_shared_ptr());
+            }
+
+            if (shape_b.size() != 2) {
+                auto reshape_shape =
+                        ngraph::opset1::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{2}, {-1ll, static_cast<int64_t>(K)});
+                fc_input_b = std::make_shared<ngraph::opset1::Reshape>(fc_input_b, reshape_shape, true);
+                new_ops.push_back(fc_input_b.get_node_shared_ptr());
+            }
+
+            // Input normalization
+            if (matmul->get_transpose_a() && shape_a.size() != 1) {
+                fc_input_a = create_transpose(fc_input_a, matmul->get_friendly_name() + "/transpose_a");
+                new_ops.push_back(fc_input_a.get_node_shared_ptr());
+            }
+
+            // Create FullyConnected
+            auto fc = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(fc_input_a, fc_input_b, output_shape, matmul->output(0).get_element_type());
+            fc->set_friendly_name(matmul->get_friendly_name());
+            new_ops.push_back(fc);
+
+            ngraph::copy_runtime_info(matmul, new_ops);
+            ngraph::replace_node(matmul, fc);
+            return true;
+        }
+        return false;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "ConvertMatMulToFC");
+    this->register_matcher(m, callback);
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertMatMulToGemm, "ConvertMatMulToGemm", 0);
+
+MKLDNNPlugin::ConvertMatMulToGemm::ConvertMatMulToGemm() {
+    auto matmul = ngraph::pattern::wrap_type<ngraph::opset1::MatMul>({ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                                                      ngraph::pattern::any_input(ngraph::pattern::has_static_shape())},
+                                                                      ngraph::pattern::has_static_shape());
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto matmul = std::dynamic_pointer_cast<ngraph::opset1::MatMul>(m.get_match_root());
+        if (!matmul) {
+            return false;
+        }
+
+        auto input_a = matmul->input(0).get_source_output();
+        auto input_b = matmul->input(1).get_source_output();
+
+        auto shape_a = input_a.get_shape();
+        auto shape_b = input_b.get_shape();
+        auto output_shape = matmul->get_shape();
+
+        auto fc_input_a = input_a, fc_input_b = input_b;
+        ngraph::NodeVector new_ops;
+
+        if (shape_a.size() == 1) {
+            // If the first input is 1D tensor, it is unsqueezed to 2D tensor (row vector)
+            // by adding axes with size 1 at ROW_INDEX_DIM, to the left of the shape.
+            // For example {S} will be reshaped to {1, S}.
+            fc_input_a = std::make_shared<ngraph::opset1::Unsqueeze>(fc_input_a,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0}));
+            shape_a = fc_input_a.get_shape();
+            new_ops.push_back(fc_input_a.get_node_shared_ptr());
+            // For 1D inputs transpose flag is expected to always act like `false`
+            matmul->set_transpose_a(false);
+        }
+        if (shape_b.size() == 1) {
+            // If the second input is 1D tensor, it is unsqueezed to 2D tensor (column vector)
+            // by adding axes with size 1 at COL_INDEX_DIM, to the right of the shape.
+            // For example {S} will be reshaped to {S, 1}.
+            fc_input_b = std::make_shared<ngraph::opset1::Unsqueeze>(fc_input_b,
+                ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1}));
+            shape_b = fc_input_b.get_shape();
+            new_ops.push_back(fc_input_b.get_node_shared_ptr());
+            // For 1D inputs transpose flag is expected to always act like `false`
+            matmul->set_transpose_b(false);
+        }
+
+        // WA for IE that Gemm must have inputs with the same length.
+        // If ranks of input arguments are still different,
+        // the smaller tensor is unsqueezed from the left side of the shape
+        // by necessary number of axes to make both shapes of the same rank.
+        if (shape_a.size() < shape_b.size()) {
+            // Reshape first input (fc_input_a)
+            ngraph::Shape reshape_shape(shape_b.size() - shape_a.size(), 1);
+            reshape_shape.insert(reshape_shape.end(), shape_a.begin(), shape_a.end());
+            fc_input_a = ngraph::op::util::reshapeTo(fc_input_a, reshape_shape);
+            new_ops.push_back(fc_input_a.get_node_shared_ptr());
+        } else if (shape_b.size() < shape_a.size()) {
+            // Reshape second input (fc_input_b)
+            ngraph::Shape reshape_shape(shape_a.size() - shape_b.size(), 1);
+            reshape_shape.insert(reshape_shape.end(), shape_b.begin(), shape_b.end());
+            fc_input_b = ngraph::op::util::reshapeTo(fc_input_b, reshape_shape);
+            new_ops.push_back(fc_input_b.get_node_shared_ptr());
+        }
+
+        auto gemm = matmul->copy_with_new_inputs({ fc_input_a, fc_input_b });
+        new_ops.push_back(gemm);
+
+        if (gemm->get_shape() != output_shape) {
+            // This case is possible when one of the inputs has exactly 1 dimension (that is not supported by GEMM operation)
+            // So to preserve output shape we insert additional reshape operation
+            std::shared_ptr<ngraph::Node> reshape_output;
+            if (output_shape.size() == 0) {
+                std::vector<int64_t> dim_indices(gemm->get_shape().size());
+                std::iota(dim_indices.begin(), dim_indices.end(), 0);
+                reshape_output = std::make_shared<ngraph::opset1::Squeeze>(gemm,
+                    ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{dim_indices.size()}, dim_indices));
+            } else {
+                reshape_output = ngraph::op::util::reshapeTo(gemm, output_shape);
+            }
+
+            new_ops.push_back(reshape_output);
+            gemm->set_friendly_name(matmul->get_friendly_name() + "/gemm");
+            reshape_output->set_friendly_name(matmul->get_friendly_name());
+            ngraph::copy_runtime_info(matmul, new_ops);
+            ngraph::replace_node(matmul, reshape_output);
+        } else {
+            gemm->set_friendly_name(matmul->get_friendly_name());
+            ngraph::copy_runtime_info(matmul, new_ops);
+            ngraph::replace_node(matmul, gemm);
+        }
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "ConvertMatMulToGemm");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.hpp
new file mode 100644
index 00000000000..6f223eb8df9
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc_or_gemm.hpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertMatMulToFC: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertMatMulToFC();
+};
+
+class ConvertMatMulToGemm: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertMatMulToGemm();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.cpp
new file mode 100644
index 00000000000..06b91614e1b
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.cpp
@@ -0,0 +1,95 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_tile_to_seq_tiles.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include <ngraph/rt_info.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertTileToSeqTiles, "ConvertTileToSeqTiles", 0);
+
+MKLDNNPlugin::ConvertTileToSeqTiles::ConvertTileToSeqTiles() {
+    auto tile = ngraph::pattern::wrap_type<ngraph::opset1::Tile>({ngraph::pattern::any_input(ngraph::pattern::has_static_rank()),
+                                                                  ngraph::pattern::wrap_type<ngraph::opset1::Constant>()});
+
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher& m) {
+        auto tile = std::dynamic_pointer_cast<ngraph::opset1::Tile>(m.get_match_root());
+        if (!tile) {
+            return false;
+        }
+
+        auto tiles_node = std::dynamic_pointer_cast<ngraph::opset1::Constant>(tile->input_value(1).get_node_shared_ptr());
+        if (!tiles_node) return false;
+
+        auto tiles = tiles_node->cast_vector<int64_t>();
+        auto input_shape_rank = static_cast<size_t>(tile->get_input_partial_shape(0).rank().get_length());
+        int64_t cur_dim_id = tiles.size() - 1;
+
+        if (static_cast<int64_t>(tiles.size()) != input_shape_rank) return false;
+
+        auto last_node = tile->input_value(0);
+        auto friendly_name = tile->get_friendly_name();
+
+        int num_of_tile_dims = 0;
+        for (auto t : tiles) {
+            if (t != 1) {
+                num_of_tile_dims++;
+            }
+        }
+
+        if (num_of_tile_dims == 0) {
+            auto outputs = tile->get_output_target_inputs(0);
+            for (const auto &out : outputs) {
+                if (std::dynamic_pointer_cast<ngraph::opset1::Result>(out.get_node()->shared_from_this())) {
+                    return false;
+                }
+            }
+            ngraph::replace_node(tile, {last_node});
+            return true;
+        }
+
+        // Will generate sequence of Tile operations if num_of_tile_dims != 1
+        // because IE Tile operations supports only one axis to be tiled.
+        // To keep op name unique will use special IE specific delimiter ':'
+        // Original frameworks doesn't use such delimiter in names, so it will
+        // guarantee that newly generated name like "original_name:_1" doesn't
+        // match with already existed names.
+        if (num_of_tile_dims > 1) {
+            friendly_name += ":";
+        }
+
+        ngraph::NodeVector new_ops;
+
+        auto tiles_it = tiles.rbegin();
+        while (tiles_it != tiles.rend()) {
+            int64_t tile_dim = *tiles_it;
+            if (tile_dim != 1) {
+                std::vector<int64_t> dims(input_shape_rank, 1);
+                dims[cur_dim_id] = tile_dim;
+                auto const_node = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{input_shape_rank}, dims);
+                auto new_tile = std::make_shared<ngraph::opset1::Tile>(last_node, const_node);
+                new_tile->set_friendly_name(friendly_name);
+                friendly_name += "_" + std::to_string(cur_dim_id);
+                new_ops.push_back(new_tile);
+
+                last_node = new_tile;
+            }
+            --cur_dim_id;
+            ++tiles_it;
+        }
+
+        last_node.get_node_shared_ptr()->set_friendly_name(tile->get_friendly_name());
+        ngraph::copy_runtime_info(tile, new_ops);
+        ngraph::replace_node(tile, {last_node});
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(tile, "ConvertTileToSeqTiles");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.hpp
new file mode 100644
index 00000000000..00a5c7ccd26
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_tile_to_seq_tiles.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertTileToSeqTiles: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertTileToSeqTiles();
+};
+
+} // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
new file mode 100644
index 00000000000..8496558e614
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
@@ -0,0 +1,49 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/pass/constant_folding.hpp>
+#include "convert_matmul_to_fc_or_gemm.hpp"
+#include "fc_bias_fusion.hpp"
+#include "reshape_fc_fusion.hpp"
+#include "reshape_fully_connected.hpp"
+#include "convert_broadcast_to_tiles.hpp"
+#include "convert_tile_to_seq_tiles.hpp"
+#include "reshape_1d_ops.hpp"
+#include "convert_to_power_static.hpp"
+#include "convert_to_leaky_relu.hpp"
+#include "convert_to_swish_cpu.hpp"
+#include "reshape_prelu.hpp"
+#include "rnn_sequences_optimization.hpp"
+
+namespace MKLDNNPlugin {
+
+inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::ConstantFolding>();
+    manager.register_pass<Reshape1DConvolution>();
+    manager.register_pass<Reshape1DGroupConvolution>();
+    manager.register_pass<Reshape1DAvgPool>();
+    manager.register_pass<Reshape1DMaxPool>();
+    manager.register_pass<ConvertBroadcastToTiles>();
+    manager.register_pass<ConvertTileToSeqTiles>();
+    manager.register_pass<ConvertMatMulToFC>();
+    manager.register_pass<ConvertMatMulToGemm>();
+    manager.register_pass<FullyConnectedBiasFusion>();
+    manager.register_pass<ReshapeFullyConnected>();
+    manager.register_pass<ConvertToPowerStatic>();
+    manager.register_pass<ConvertToLeakyRelu>();
+    manager.register_pass<ReshapePRelu>();
+    manager.register_pass<ConvertToSwishCPU>();
+    manager.register_pass<OptimizeGRUSequenceTransposes>();
+    manager.register_pass<OptimizeLSTMSequenceTransposes>();
+    manager.register_pass<OptimizeRNNSequenceTransposes>();
+    if (!ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc)) {
+        manager.register_pass<ReshapeFullyConnectedFusion>();
+    }
+    manager.register_pass<ngraph::pass::ConstantFolding>();
+    manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::i64, ngraph::element::i32);
+    manager.run_passes(nGraphFunc);
+}
+
+}  // namespace MKLDNNPlugin
\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.cpp
new file mode 100644
index 00000000000..73d469c652c
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.cpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_to_leaky_relu.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "op/leaky_relu.hpp"
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertToLeakyRelu, "ConvertToLeakyRelu", 0);
+
+MKLDNNPlugin::ConvertToLeakyRelu::ConvertToLeakyRelu() {
+    auto prelu = ngraph::pattern::wrap_type<ngraph::opset1::PRelu>({ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                                                    ngraph::pattern::any_input(ngraph::pattern::has_static_shape())});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto prelu = std::dynamic_pointer_cast<ngraph::opset1::PRelu>(m.get_match_root());
+        if (!prelu) {
+            return false;
+        }
+        auto slopeNode = std::dynamic_pointer_cast<ngraph::opset1::Constant>(prelu->get_input_node_shared_ptr(1));
+        if (slopeNode != nullptr && ngraph::shape_size(prelu->get_input_shape(1)) == 1) {
+            const float slope = slopeNode->cast_vector<float>()[0];
+            const auto leakyRelu = std::make_shared<MKLDNNPlugin::LeakyReluNode>(prelu->input(0).get_source_output(), slope,
+                                                                                 prelu->output(0).get_element_type());
+            leakyRelu->set_friendly_name(prelu->get_friendly_name());
+            ngraph::copy_runtime_info(prelu, leakyRelu);
+            ngraph::replace_node(prelu, leakyRelu);
+            return true;
+        }
+        return false;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(prelu, "ConvertToLeakyRelu");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.hpp
new file mode 100644
index 00000000000..6e5eff2937c
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_leaky_relu.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertToLeakyRelu: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertToLeakyRelu();
+};
+
+}  // namespace MKLDNNPlugin
\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.cpp
new file mode 100644
index 00000000000..48a00ec98ae
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.cpp
@@ -0,0 +1,131 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_to_power_static.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset4.hpp>
+#include <ngraph/opsets/opset6.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include "op/power_static.hpp"
+#include "op/fully_connected.hpp"
+#include "utils/general_utils.h"
+
+int getConstPort(const std::shared_ptr<ngraph::Node> &node) {
+    const auto const1 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(0));
+    const auto const2 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(1));
+    int constPort = -1;
+    if (const2) {
+        constPort = 1;
+    } else if (const1) {
+        constPort = 0;
+    }
+    return constPort;
+}
+
+template <class BaseOp>
+bool isConvertableToPowerStatic(const std::shared_ptr<BaseOp> &node) {
+    const int constPort = getConstPort(node);
+    if ((!node->get_input_element_type(0).is_real() && !node->get_input_element_type(1).is_real()) || !node->get_output_element_type(0).is_real() ||
+            constPort == -1) {
+        return false;
+    }
+
+    const int nonConstPort = 1 - constPort;
+    const auto constNode = std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(constPort));
+
+    return ngraph::shape_size(node->get_input_shape(constPort)) == 1 &&
+           node->get_input_shape(nonConstPort).size() >= node->get_input_shape(constPort).size() &&
+           !MKLDNNPlugin::one_of(node->get_input_node_shared_ptr(nonConstPort)->get_type_info(), ngraph::opset1::NormalizeL2::type_info,
+                                                                                                 ngraph::opset4::Interpolate::type_info,
+                                                                                                 ngraph::opset1::Convolution::type_info,
+                                                                                                 ngraph::opset1::GroupConvolution::type_info,
+                                                                                                 ngraph::opset1::ConvolutionBackpropData::type_info,
+                                                                                                 ngraph::opset1::GroupConvolutionBackpropData::type_info,
+                                                                                                 MKLDNNPlugin::FullyConnectedNode::type_info,
+                                                                                                 ngraph::op::v0::MVN::type_info,
+                                                                                                 ngraph::opset6::MVN::type_info);
+}
+
+template <>
+bool isConvertableToPowerStatic(const std::shared_ptr<ngraph::opset1::Power> &node) {
+    return std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(1)) != nullptr &&
+           node->get_input_shape(0).size() >= node->get_input_shape(1).size() && ngraph::shape_size(node->get_input_shape(1)) == 1;
+}
+
+template <class BaseOp>
+std::shared_ptr<ngraph::Node> convert(const std::shared_ptr<BaseOp> &node) {
+    const int constPort = getConstPort(node);
+    const int nonConstPort = 1 - constPort;
+    std::shared_ptr<ngraph::opset1::Constant> powerNode = std::dynamic_pointer_cast<ngraph::opset1::Constant>(node->get_input_node_shared_ptr(constPort));
+    const float value = powerNode->cast_vector<float>()[0];
+    if (std::is_same<BaseOp, ngraph::opset1::Power>::value) {
+        return std::make_shared<MKLDNNPlugin::PowerStaticNode>(node->input(nonConstPort).get_source_output(), value, 1.0f, 0.0f,
+                                                               node->output(0).get_element_type());
+    } else if (std::is_same<BaseOp, ngraph::opset1::Add>::value) {
+        return std::make_shared<MKLDNNPlugin::PowerStaticNode>(node->input(nonConstPort).get_source_output(), 1.0f, 1.0f, value,
+                                                               node->output(0).get_element_type());
+    } else if (std::is_same<BaseOp, ngraph::opset1::Subtract>::value) {
+        float scale = 1.0f;
+        float shift = value;
+        if (constPort == 0) {
+            scale *= -1.0f;
+        } else {
+            shift *= -1.0f;
+        }
+        return std::make_shared<MKLDNNPlugin::PowerStaticNode>(node->input(nonConstPort).get_source_output(), 1.0f, scale, shift,
+                                                               node->output(0).get_element_type());
+    } else if (std::is_same<BaseOp, ngraph::opset1::Multiply>::value) {
+        return std::make_shared<MKLDNNPlugin::PowerStaticNode>(node->input(nonConstPort).get_source_output(), 1.f, value, 0.0f,
+                                                               node->output(0).get_element_type());
+    } else {
+        throw ngraph::ngraph_error("ConvertToPowerStatic: op type is not supported");
+    }
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertToPowerStatic, "ConvertToPowerStatic", 0);
+
+MKLDNNPlugin::ConvertToPowerStatic::ConvertToPowerStatic() {
+    ngraph::OutputVector twoInputs = {ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                      ngraph::pattern::any_input(ngraph::pattern::has_static_shape())};
+    auto power = ngraph::pattern::wrap_type<ngraph::opset1::Power>(twoInputs);
+    auto add = ngraph::pattern::wrap_type<ngraph::opset1::Add>(twoInputs);
+    auto sub = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>(twoInputs);
+    auto mult = ngraph::pattern::wrap_type<ngraph::opset1::Multiply>(twoInputs);
+    const auto candidate = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{power, add, sub, mult});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher &m) {
+        auto node = m.get_match_root();
+
+        std::shared_ptr<ngraph::Node> toReplace = node;
+        if (auto power = std::dynamic_pointer_cast<ngraph::opset1::Power>(node)) {
+            if (!isConvertableToPowerStatic(power))
+                return false;
+            toReplace = convert(power);
+        } else if (auto add = std::dynamic_pointer_cast<ngraph::opset1::Add>(node)) {
+            if (!isConvertableToPowerStatic(add))
+                return false;
+            toReplace = convert(add);
+        } else if (auto sub = std::dynamic_pointer_cast<ngraph::opset1::Subtract>(node)) {
+            if (!isConvertableToPowerStatic(sub))
+                return false;
+            toReplace = convert(sub);
+        } else if (auto mult = std::dynamic_pointer_cast<ngraph::opset1::Multiply>(node)) {
+            if (!isConvertableToPowerStatic(mult))
+                return false;
+            toReplace = convert(mult);
+        } else {
+            throw ngraph::ngraph_error("ConvertToPowerStatic: op type is not supported");
+        }
+        toReplace->set_friendly_name(node->get_friendly_name());
+        ngraph::copy_runtime_info(node, toReplace);
+        ngraph::replace_node(node, toReplace);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(candidate, "ConvertToPowerStatic");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.hpp
new file mode 100644
index 00000000000..9fefa3a9ba5
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_power_static.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertToPowerStatic: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertToPowerStatic();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.cpp
new file mode 100644
index 00000000000..78f1ff516e5
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_to_swish_cpu.hpp"
+
+#include <ngraph/opsets/opset4.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "op/swish_cpu.hpp"
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ConvertToSwishCPU, "ConvertToSwishCPU", 0);
+
+MKLDNNPlugin::ConvertToSwishCPU::ConvertToSwishCPU() {
+    auto swish = ngraph::pattern::wrap_type<ngraph::opset4::Swish>();
+
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher& m) {
+        auto swish = std::dynamic_pointer_cast<ngraph::opset4::Swish> (m.get_match_root());
+        if (!swish) {
+            return false;
+        }
+        float beta_value = 1.0;
+        if (swish->input_values().size() == 2) {
+            auto beta = std::dynamic_pointer_cast<ngraph::opset4::Constant>(swish->get_input_node_shared_ptr(1));
+
+            if (!beta || ngraph::shape_size(swish->get_input_shape(1)) != 1) {
+                return false;
+            }
+            beta_value = beta->cast_vector<float>()[0];
+        }
+
+        auto swish_cpu = std::make_shared<MKLDNNPlugin::SwishNode>(swish->input(0).get_source_output(), beta_value);
+        swish_cpu->set_friendly_name(swish->get_friendly_name());
+        ngraph::copy_runtime_info(swish, swish_cpu);
+        ngraph::replace_node(swish, swish_cpu);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(swish, "ConvertToSwishCPU");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.hpp
new file mode 100644
index 00000000000..98da1e18c6a
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_swish_cpu.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ConvertToSwishCPU: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertToSwishCPU();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.cpp
new file mode 100644
index 00000000000..5ad5b180b52
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.cpp
@@ -0,0 +1,70 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "fc_bias_fusion.hpp"
+#include "op/fully_connected.hpp"
+#include <numeric>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::FullyConnectedBiasFusion, "FullyConnectedBiasFusion", 0);
+
+MKLDNNPlugin::FullyConnectedBiasFusion::FullyConnectedBiasFusion() {
+    auto m_fc = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>([](ngraph::Output<ngraph::Node> output) {
+        return ngraph::pattern::consumers_count(1)(output) && ngraph::pattern::has_static_shape()(output);
+    });
+    auto m_bias = ngraph::pattern::any_input();
+    auto m_add = ngraph::pattern::wrap_type<ngraph::opset1::Add>({m_fc, m_bias});
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
+        auto & pattern_to_output = m.get_pattern_value_map();
+
+        auto add = pattern_to_output[m_add].get_node_shared_ptr();
+        auto bias = pattern_to_output[m_bias].get_node_shared_ptr();
+        auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(pattern_to_output[m_fc].get_node_shared_ptr());
+        if (!fc) {
+            return false;
+        }
+
+        if (auto bcast = std::dynamic_pointer_cast<ngraph::opset1::Broadcast>(bias)) {
+            bias = bcast->input_value(0).get_node_shared_ptr();
+        }
+
+        if (!std::dynamic_pointer_cast<ngraph::opset1::Constant>(bias)) {
+            return false;
+        }
+
+        ngraph::Shape bias_shape(bias->get_shape());
+        ngraph::Shape output_shape(fc->get_shape());
+        size_t bias_size = std::accumulate(bias_shape.begin(), bias_shape.end(), size_t{1}, std::multiplies<int64_t>());
+        if (bias_shape.empty() || bias_shape.back() != output_shape.back() || bias_shape.back() != bias_size) {
+            return false;
+        }
+
+        ngraph::NodeVector new_ops;
+
+        std::shared_ptr<ngraph::Node> final_bias = bias;
+        if (bias->get_shape().size() >= 2) {
+            final_bias = std::make_shared<ngraph::opset1::Reshape>(final_bias, ngraph::opset1::Constant::create(ngraph::element::i64,
+                                                                                                                ngraph::Shape{1}, {-1}), true);
+            new_ops.push_back(final_bias);
+        }
+
+        auto new_fc = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(fc->input(0).get_source_output(),
+                                                                         fc->input(1).get_source_output(),
+                                                                         final_bias,
+                                                                         fc->get_shape(),
+                                                                         fc->get_output_type());
+        new_ops.push_back(new_fc);
+
+        new_fc->set_friendly_name(add->get_friendly_name());
+        ngraph::copy_runtime_info({fc, add}, new_ops);
+        ngraph::replace_node(add, new_fc);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(m_add, "FullyConnectedBiasFusion");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.hpp
new file mode 100644
index 00000000000..5bb154651af
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/fc_bias_fusion.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class FullyConnectedBiasFusion : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    FullyConnectedBiasFusion();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.cpp
new file mode 100644
index 00000000000..cf41d41efd4
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "fully_connected.hpp"
+
+constexpr ngraph::NodeTypeInfo MKLDNNPlugin::FullyConnectedNode::type_info;
+
+MKLDNNPlugin::FullyConnectedNode::FullyConnectedNode(const ngraph::Output<Node>& A,
+                                                     const ngraph::Output<Node>& B,
+                                                     const ngraph::Shape& output_shape,
+                                                     const ngraph::element::Type output_type)
+    : Op({A, B}), m_output_shape(output_shape), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+MKLDNNPlugin::FullyConnectedNode::FullyConnectedNode(const ngraph::Output<Node>& A,
+                                                     const ngraph::Output<Node>& B,
+                                                     const ngraph::Output<Node>& C,
+                                                     const ngraph::Shape& output_shape,
+                                                     const ngraph::element::Type output_type)
+    : Op({A, B, C}), m_output_shape(output_shape), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<ngraph::Node> MKLDNNPlugin::FullyConnectedNode::clone_with_new_inputs(const ngraph::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    if (new_args.size() == 2) {
+        return std::make_shared<MKLDNNPlugin::FullyConnectedNode>(new_args.at(0), new_args.at(1), m_output_shape);
+    } else if (new_args.size() == 3) {
+        return std::make_shared<MKLDNNPlugin::FullyConnectedNode>(new_args.at(0), new_args.at(1), new_args.at(2), m_output_shape);
+    }
+
+    throw ngraph::ngraph_error("Unsupported number of arguments for FullyConnected operation");
+}
+
+void MKLDNNPlugin::FullyConnectedNode::validate_and_infer_types() {
+    m_output_size = m_output_shape.back();
+    set_output_type(0, m_output_type == ngraph::element::undefined ? input_value(0).get_element_type() : m_output_type, m_output_shape);
+}
+
+bool MKLDNNPlugin::FullyConnectedNode::visit_attributes(ngraph::AttributeVisitor &visitor) {
+    visitor.on_attribute("out-size", m_output_size);
+    return true;
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.hpp
new file mode 100644
index 00000000000..c492897d747
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/fully_connected.hpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/node.hpp>
+#include <ngraph/op/op.hpp>
+#include <ngraph/op/util/fused_op.hpp>
+
+namespace MKLDNNPlugin {
+
+class FullyConnectedNode : public ngraph::op::Op {
+public:
+    static constexpr ngraph::NodeTypeInfo type_info{"FullyConnected", 0};
+    const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    FullyConnectedNode() = default;
+
+    FullyConnectedNode(const ngraph::Output<Node> &A,
+                       const ngraph::Output<Node> &B,
+                       const ngraph::Shape &output_shape,
+                       const ngraph::element::Type output_type = ngraph::element::undefined);
+
+    FullyConnectedNode(const ngraph::Output<Node> &A,
+                       const ngraph::Output<Node> &B,
+                       const ngraph::Output<Node> &C,
+                       const ngraph::Shape &output_shape,
+                       const ngraph::element::Type output_type = ngraph::element::undefined);
+
+    bool visit_attributes(ngraph::AttributeVisitor &visitor) override;
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
+
+    size_t get_out_size() const { return m_output_size; }
+
+    ngraph::element::Type get_output_type() const { return m_output_type; }
+
+private:
+    size_t m_output_size = 0;
+    ngraph::Shape m_output_shape = {};
+    ngraph::element::Type m_output_type;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.cpp
new file mode 100644
index 00000000000..4e943d4b517
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.cpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "leaky_relu.hpp"
+
+constexpr ngraph::NodeTypeInfo MKLDNNPlugin::LeakyReluNode::type_info;
+
+MKLDNNPlugin::LeakyReluNode::LeakyReluNode(const ngraph::Output<ngraph::Node> &data,
+                                           const float &negative_slope,
+                                           const ngraph::element::Type output_type)
+    : Op({data}), m_negative_slope(negative_slope), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<ngraph::Node> MKLDNNPlugin::LeakyReluNode::clone_with_new_inputs(const ngraph::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<MKLDNNPlugin::LeakyReluNode>(new_args.at(0), m_negative_slope, m_output_type);
+}
+
+void MKLDNNPlugin::LeakyReluNode::validate_and_infer_types() {
+    set_output_type(
+        0,
+        m_output_type == ngraph::element::undefined ? get_input_element_type(0) : m_output_type,
+        get_input_partial_shape(0));
+}
+
+bool MKLDNNPlugin::LeakyReluNode::visit_attributes(ngraph::AttributeVisitor &visitor) {
+    visitor.on_attribute("negative_slope", m_negative_slope);
+    return true;
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.hpp
new file mode 100644
index 00000000000..3465ffc7510
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/leaky_relu.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/op.hpp>
+
+namespace MKLDNNPlugin {
+
+class LeakyReluNode : public ngraph::op::Op {
+public:
+    static constexpr ngraph::NodeTypeInfo type_info{"LeakyRelu", 0};
+    const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    LeakyReluNode(const ngraph::Output<ngraph::Node> &data, const float &negative_slope, const ngraph::element::Type output_type);
+
+    void validate_and_infer_types() override;
+
+    bool visit_attributes(ngraph::AttributeVisitor &visitor) override;
+
+    std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector &new_args) const override;
+
+    float get_slope() { return m_negative_slope; }
+
+    ngraph::element::Type get_output_type() const { return m_output_type; }
+
+private:
+    float m_negative_slope;
+    ngraph::element::Type m_output_type;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.cpp
new file mode 100644
index 00000000000..be1f23f9bb3
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.cpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "power_static.hpp"
+
+constexpr ngraph::NodeTypeInfo MKLDNNPlugin::PowerStaticNode::type_info;
+
+MKLDNNPlugin::PowerStaticNode::PowerStaticNode(const ngraph::Output<Node> &data,
+                                               const float &power,
+                                               const float &scale,
+                                               const float &shift,
+                                               const ngraph::element::Type output_type)
+    : Op({data}), scale(scale), power(power), shift(shift), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<ngraph::Node> MKLDNNPlugin::PowerStaticNode::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
+    if (new_args.size() != 1) {
+        throw ngraph::ngraph_error("Incorrect number of new arguments");
+    }
+
+    return std::make_shared<MKLDNNPlugin::PowerStaticNode>(new_args.at(0), this->power, this->scale, this->shift, this->m_output_type);
+}
+
+void MKLDNNPlugin::PowerStaticNode::validate_and_infer_types() {
+    set_output_type(0, m_output_type == ngraph::element::undefined ? get_input_element_type(0) : m_output_type, get_input_partial_shape(0));
+}
+
+bool MKLDNNPlugin::PowerStaticNode::visit_attributes(ngraph::AttributeVisitor &visitor) {
+    visitor.on_attribute("scale", scale);
+    visitor.on_attribute("power", power);
+    visitor.on_attribute("shift", shift);
+    return true;
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.hpp
new file mode 100644
index 00000000000..e43a54c4e03
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/power_static.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/op.hpp>
+
+namespace MKLDNNPlugin {
+
+class PowerStaticNode : public ngraph::op::Op {
+public:
+    static constexpr ngraph::NodeTypeInfo type_info{"PowerStatic", 0};
+    const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    PowerStaticNode(const ngraph::Output<ngraph::Node> &data, const float &power, const float &scale, const float &shift,
+                    const ngraph::element::Type output_type = ngraph::element::undefined);
+
+    void validate_and_infer_types() override;
+
+    bool visit_attributes(ngraph::AttributeVisitor &visitor) override;
+
+    std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector &new_args) const override;
+
+    float get_power() const { return power; }
+    float get_scale() const { return scale; }
+    float get_shift() const { return shift; }
+
+private:
+    float scale, power, shift;
+    ngraph::element::Type m_output_type;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.cpp
new file mode 100644
index 00000000000..b940eb2bca6
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.cpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "swish_cpu.hpp"
+
+constexpr ngraph::NodeTypeInfo MKLDNNPlugin::SwishNode::type_info;
+
+MKLDNNPlugin::SwishNode::SwishNode(const ngraph::Output<ngraph::Node> & input, const float alpha)
+        : Op({input}), m_alpha(alpha) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<ngraph::Node> MKLDNNPlugin::SwishNode::clone_with_new_inputs(const ngraph::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<MKLDNNPlugin::SwishNode>(new_args.at(0), m_alpha);
+}
+
+bool MKLDNNPlugin::SwishNode::visit_attributes(ngraph::AttributeVisitor& visitor) {
+    visitor.on_attribute("alpha", m_alpha);
+    return true;
+}
+
+void MKLDNNPlugin::SwishNode::validate_and_infer_types() {
+    set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+}
+
+float MKLDNNPlugin::SwishNode::get_alpha() const {
+    return m_alpha;
+}
+
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.hpp
new file mode 100644
index 00000000000..3ff1c30a1ad
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/op/swish_cpu.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/op.hpp>
+
+namespace MKLDNNPlugin {
+
+class SwishNode : public ngraph::op::Op {
+public:
+    static constexpr ngraph::NodeTypeInfo type_info{"SwishCPU", 0};
+    const ngraph::NodeTypeInfo &get_type_info() const override { return type_info; }
+
+    explicit SwishNode(const ngraph::Output<Node> &input, float alpha = 1.0);
+
+    void validate_and_infer_types() override;
+    bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+    std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector &new_args) const override;
+
+    float get_alpha() const;
+protected:
+    float m_alpha;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.cpp
new file mode 100644
index 00000000000..11e9919a761
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.cpp
@@ -0,0 +1,175 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reshape_1d_ops.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph_ops/type_relaxed.hpp>
+
+#include "transformations/utils/utils.hpp"
+
+template <class BaseOp>
+std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<BaseOp> node, ngraph::NodeVector &new_ops) {
+    auto new_strides = node->get_strides();
+    auto new_dilations = node->get_dilations();
+    auto new_pads_begin = node->get_pads_begin();
+    auto new_pad_end = node->get_pads_end();
+
+    new_strides.insert(new_strides.begin(), 1);
+    new_dilations.insert(new_dilations.begin(), 1);
+    new_pads_begin.insert(new_pads_begin.begin(), 0);
+    new_pad_end.insert(new_pad_end.begin(), 0);
+
+    ngraph::Shape new_weights_shape(node->input_value(1).get_shape());
+    new_weights_shape.insert(new_weights_shape.begin() + new_weights_shape.size() - 1, 1);
+    auto weights = ngraph::op::util::reshapeTo(node->input_value(1), new_weights_shape);
+
+    new_ops.push_back(weights);
+
+    if (std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(node)) {
+        return std::make_shared<ngraph::op::TypeRelaxed<BaseOp>>(std::vector<ngraph::element::Type>{ngraph::element::f32, ngraph::element::f32},
+                                                                 std::vector<ngraph::element::Type>{ngraph::element::f32},
+                                                                 ngraph::op::TemporaryReplaceOutputType(data, ngraph::element::f32).get(),
+                                                                 ngraph::op::TemporaryReplaceOutputType(weights, ngraph::element::f32).get(),
+                                                                 new_strides,
+                                                                 new_pads_begin,
+                                                                 new_pad_end,
+                                                                 new_dilations,
+                                                                 node->get_auto_pad());
+    } else {
+        return std::make_shared<BaseOp>(data,
+                                        weights,
+                                        new_strides,
+                                        new_pads_begin,
+                                        new_pad_end,
+                                        new_dilations,
+                                        node->get_auto_pad());
+    }
+}
+
+template <>
+std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<ngraph::opset1::MaxPool> node, ngraph::NodeVector & new_ops) {
+    auto new_strides = node->get_strides();
+    auto new_pads_begin = node->get_pads_begin();
+    auto new_pad_end = node->get_pads_end();
+    auto new_kernel = node->get_kernel();
+
+    new_strides.insert(new_strides.begin(), 1);
+    new_pads_begin.insert(new_pads_begin.begin(), 0);
+    new_pad_end.insert(new_pad_end.begin(), 0);
+    new_kernel.insert(new_kernel.begin(), 1);
+
+    return std::make_shared<ngraph::opset1::MaxPool>(data,
+                                                     new_strides,
+                                                     new_pads_begin,
+                                                     new_pad_end,
+                                                     new_kernel,
+                                                     node->get_rounding_type(),
+                                                     node->get_auto_pad());
+}
+
+template <>
+std::shared_ptr<ngraph::Node> convert(const ngraph::Output<ngraph::Node> & data, std::shared_ptr<ngraph::opset1::AvgPool> node, ngraph::NodeVector & new_ops) {
+    // Update Pooling attributes with additional dimension
+    auto new_strides = node->get_strides();
+    auto new_pads_begin = node->get_pads_begin();
+    auto new_pad_end = node->get_pads_end();
+    auto new_kernel = node->get_kernel();
+
+    new_strides.insert(new_strides.begin(), 1);
+    new_pads_begin.insert(new_pads_begin.begin(), 0);
+    new_pad_end.insert(new_pad_end.begin(), 0);
+    new_kernel.insert(new_kernel.begin(), 1);
+
+    return std::make_shared<ngraph::opset1::AvgPool>(data,
+                                             new_strides,
+                                             new_pads_begin,
+                                             new_pad_end,
+                                             new_kernel,
+                                             node->get_exclude_pad(),
+                                             node->get_rounding_type(),
+                                             node->get_auto_pad());
+}
+
+ngraph::matcher_pass_callback get_callback() {
+    return [](ngraph::pattern::Matcher& m) {
+        auto node = m.get_match_root();
+        if (node->input(0).get_partial_shape().rank().get_length() != 3) {
+            return false;
+        }
+
+        // Insert H dimension equal to 1
+        auto input_shape = node->input(0).get_shape();
+        auto output_shape = node->output(0).get_shape();
+
+        input_shape.insert(input_shape.begin() + 2, 1);
+
+        ngraph::NodeVector new_ops;
+
+        // Reshape(input_shape)->Op->Reshape(output_shape)
+        ngraph::Output<ngraph::Node> last = ngraph::op::util::reshapeTo(node->input_value(0), input_shape);
+        last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name() + "/reshape_begin");
+        new_ops.push_back(last.get_node_shared_ptr());
+
+        if (auto conv = std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node)) {
+            last = convert(last, conv, new_ops);
+        } else if (auto group_conv = std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node)) {
+            last = convert(last, group_conv, new_ops);
+        } else if (auto max_pool = std::dynamic_pointer_cast<ngraph::opset1::MaxPool>(node)) {
+            last = convert(last, max_pool, new_ops);
+        } else if (auto avg_pool = std::dynamic_pointer_cast<ngraph::opset1::AvgPool>(node)) {
+            last = convert(last, avg_pool, new_ops);
+        } else {
+            throw ngraph::ngraph_error("Reshape1DOps: op type is not supported");
+        }
+
+        last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name() + "/new");
+        new_ops.push_back(last.get_node_shared_ptr());
+
+        last = ngraph::op::util::reshapeTo(last, output_shape);
+        last.get_node_shared_ptr()->set_friendly_name(node->get_friendly_name());
+        new_ops.push_back(last.get_node_shared_ptr());
+
+        ngraph::copy_runtime_info(node, new_ops);
+        node->output(0).replace(last);
+        return true;
+    };
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DConvolution, "Reshape1DConvolution", 0);
+
+MKLDNNPlugin::Reshape1DConvolution::Reshape1DConvolution() {
+    auto conv = ngraph::pattern::wrap_type<ngraph::opset1::Convolution>(ngraph::pattern::has_static_shape());
+    auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "Reshape1DConvolution");
+    this->register_matcher(m, get_callback());
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DGroupConvolution, "Reshape1DGroupConvolution", 0);
+
+MKLDNNPlugin::Reshape1DGroupConvolution::Reshape1DGroupConvolution() {
+    auto group_conv = ngraph::pattern::wrap_type<ngraph::opset1::GroupConvolution>(ngraph::pattern::has_static_shape());
+    auto m = std::make_shared<ngraph::pattern::Matcher>(group_conv, "Reshape1DGroupConvolution");
+    this->register_matcher(m, get_callback());
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DAvgPool, "Reshape1DAvgPool", 0);
+
+MKLDNNPlugin::Reshape1DAvgPool::Reshape1DAvgPool() {
+    auto pool = ngraph::pattern::wrap_type<ngraph::opset1::AvgPool>(ngraph::pattern::has_static_shape());
+    auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "Reshape1DAvgPool");
+    this->register_matcher(m, get_callback());
+}
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::Reshape1DMaxPool, "Reshape1DMaxPool", 0);
+
+MKLDNNPlugin::Reshape1DMaxPool::Reshape1DMaxPool() {
+    auto pool = ngraph::pattern::wrap_type<ngraph::opset1::MaxPool>(ngraph::pattern::has_static_shape());
+    auto m = std::make_shared<ngraph::pattern::Matcher>(pool, "Reshape1DMaxPool");
+    this->register_matcher(m, get_callback());
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.hpp
new file mode 100644
index 00000000000..04e734f7638
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_1d_ops.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class Reshape1DConvolution: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Reshape1DConvolution();
+};
+
+class Reshape1DGroupConvolution: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Reshape1DGroupConvolution();
+};
+
+class Reshape1DAvgPool: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Reshape1DAvgPool();
+};
+
+class Reshape1DMaxPool: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Reshape1DMaxPool();
+};
+
+}  // namespace MKLDNNPlugin
\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
new file mode 100644
index 00000000000..09d3e7e0554
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.cpp
@@ -0,0 +1,80 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reshape_fc_fusion.hpp"
+#include "op/fully_connected.hpp"
+#include <numeric>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapeFullyConnectedFusion, "ReshapeFullyConnectedFusion", 0);
+
+MKLDNNPlugin::ReshapeFullyConnectedFusion::ReshapeFullyConnectedFusion() {
+    auto m_reshape = ngraph::pattern::wrap_type<ngraph::opset1::Reshape>(ngraph::pattern::has_static_shape());
+    ngraph::OutputVector twoInputs = {m_reshape, ngraph::pattern::any_input()};
+    ngraph::OutputVector threeInputs = {m_reshape, ngraph::pattern::any_input(), ngraph::pattern::any_input()};
+    auto fcTwoInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(twoInputs, ngraph::pattern::has_static_shape());
+    auto fcThreeInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(threeInputs, ngraph::pattern::has_static_shape());
+    const auto fcTwoOrThreeInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{fcTwoInputs, fcThreeInputs});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher &m) {
+        auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(m.get_match_root());
+        auto reshape = std::dynamic_pointer_cast<ngraph::opset1::Reshape>(fc->get_input_node_shared_ptr(0));
+
+        // Check that Reshape reshapes 4D tensor to 2D or input shape = output shape
+        auto shape_in = reshape->input_value(0).get_shape();
+        auto shape_out = reshape->get_shape();
+        if (!((shape_in.size() == 4 && reshape->get_shape().size() == 2) || (shape_in == shape_out && !shape_in.empty()))) {
+            return false;
+        }
+
+        // Check that Weights[O, C*H*W] consistent with Input[N, C, H, W]
+        auto shape_w = fc->input_value(1).get_shape();
+        if (shape_in[0] != shape_out[0] || std::accumulate(shape_in.begin() + 1, shape_in.end(), size_t{1}, std::multiplies<size_t>()) != shape_w[1]) {
+            return false;
+        }
+
+        ngraph::NodeVector new_ops;
+        auto weightInput = fc->input(1).get_source_output();
+        ngraph::Shape newWeightsShape;
+        const auto outShape = fc->get_shape();
+        if (shape_in.size() == 3) {
+            newWeightsShape = ngraph::Shape({outShape[2], shape_in[2]});
+        } else {
+            newWeightsShape.push_back(outShape[1]);
+            for (int i = 1; i < shape_in.size(); i++)
+                newWeightsShape.push_back(shape_in[i]);
+        }
+
+        if (newWeightsShape != weightInput.get_shape()) {
+            auto newShape = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{newWeightsShape.size()}, newWeightsShape);
+            weightInput = std::make_shared<ngraph::opset1::Reshape>(weightInput, newShape, true);
+            new_ops.push_back(weightInput.get_node_shared_ptr());
+        }
+
+        std::shared_ptr<ngraph::Node> new_fc;
+        if (fc->get_input_size() == 2) {
+            new_fc = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape->input_value(0),
+                                                                        weightInput,
+                                                                        outShape,
+                                                                        fc->output(0).get_element_type());
+        } else if (fc->get_input_size() == 3) {
+            new_fc = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape->input_value(0),
+                                                                        weightInput,
+                                                                        fc->input_value(2),
+                                                                        outShape,
+                                                                        fc->output(0).get_element_type());
+        }
+        new_ops.push_back(new_fc);
+        new_fc->set_friendly_name(fc->get_friendly_name());
+        ngraph::copy_runtime_info({reshape, fc}, new_ops);
+        ngraph::replace_node(fc, new_fc);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(fcTwoOrThreeInputs, "ReshapeFullyConnectedFusion");
+    register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.hpp
new file mode 100644
index 00000000000..ab60fd19847
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fc_fusion.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ReshapeFullyConnectedFusion : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ReshapeFullyConnectedFusion();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
new file mode 100644
index 00000000000..999d1b958d8
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
@@ -0,0 +1,84 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reshape_fully_connected.hpp"
+#include "op/fully_connected.hpp"
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <transformations/utils/utils.hpp>
+#include <ngraph/pattern/op/or.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapeFullyConnected, "ReshapeFullyConnected", 0);
+
+MKLDNNPlugin::ReshapeFullyConnected::ReshapeFullyConnected() {
+    ngraph::OutputVector twoInputs = {ngraph::pattern::any_input(ngraph::pattern::has_static_shape()), ngraph::pattern::any_input()};
+    ngraph::OutputVector threeInputs = {ngraph::pattern::any_input(ngraph::pattern::has_static_shape()), ngraph::pattern::any_input(),
+                                        ngraph::pattern::any_input()};
+    auto fcTwoInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(twoInputs, ngraph::pattern::has_static_shape());
+    auto fcThreeInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(threeInputs, ngraph::pattern::has_static_shape());
+    const auto fcTwoOrThreeInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{fcTwoInputs, fcThreeInputs});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode> (m.get_match_root());
+        if (!fc || transformation_callback(fc)) {
+            return false;
+        }
+
+        auto input_shape = fc->input_value(0).get_shape();
+        auto output_shape = fc->get_shape();
+
+        if (input_shape.size() == 2) {
+            return false;
+        }
+
+        ngraph::NodeVector new_ops;
+
+        std::vector<int64_t> reshape_shape{-1, static_cast<int64_t>(input_shape.back())};
+        auto reshape = std::make_shared<ngraph::opset1::Reshape>(fc->input_value(0),
+                                                         ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, reshape_shape), true);
+        new_ops.push_back(reshape);
+
+        reshape->set_friendly_name(fc->get_friendly_name() + "/Reshape");
+
+        // Calculate output shape for new FullyConnected layer
+        // [I, K] * [O, K] = [I, O]
+        auto I = reshape->get_shape()[0];
+        auto O = fc->input_value(1).get_shape()[0];
+        ngraph::Shape output_shape_new{I, O};
+
+        std::shared_ptr<ngraph::Node> fc_new;
+        if (fc->get_input_size() == 2) {
+            fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
+                                                                        fc->input_value(1),
+                                                                        output_shape_new,
+                                                                        fc->get_output_type());
+        } else if (fc->get_input_size() == 3) {
+            fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
+                                                                        fc->input_value(1),
+                                                                        fc->input_value(2),
+                                                                        output_shape_new,
+                                                                        fc->get_output_type());
+        }
+        new_ops.push_back(fc_new);
+
+        if (output_shape != output_shape_new) {
+            auto reshape_output = ngraph::op::util::reshapeTo(fc_new, output_shape);
+            new_ops.push_back(reshape_output);
+            reshape_output->set_friendly_name(fc->get_friendly_name());
+            fc_new->set_friendly_name(fc->get_friendly_name() + "/FC");
+            ngraph::copy_runtime_info(fc, new_ops);
+            ngraph::replace_node(fc, reshape_output);
+        } else {
+            fc_new->set_friendly_name(fc->get_friendly_name());
+            ngraph::copy_runtime_info(fc, new_ops);
+            ngraph::replace_node(fc, fc_new);
+        }
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(fcTwoOrThreeInputs, "ReshapeFullyConnected");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp
new file mode 100644
index 00000000000..162427de5de
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+/*
+ * Description:
+ *     ReshapeFullyConnected transformation detects FullyConnected operations
+ *     and for each operation where input shape is greater than 2 inserts Reshape
+ *     operations before and after FullyConnected operation. This transformation is
+ *     required because of IE restrictions.
+ */
+
+namespace MKLDNNPlugin {
+
+class ReshapeFullyConnected: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ReshapeFullyConnected();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
new file mode 100644
index 00000000000..0cc1a33cbc3
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.cpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "reshape_prelu.hpp"
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "transformations/utils/utils.hpp"
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapePRelu, "ReshapePRelu", 0);
+
+MKLDNNPlugin::ReshapePRelu::ReshapePRelu() {
+    auto prelu = ngraph::pattern::wrap_type<ngraph::opset1::PRelu>({ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
+                                                                    ngraph::pattern::any_input(ngraph::pattern::has_static_shape())});
+
+    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
+        auto prelu = std::dynamic_pointer_cast<ngraph::opset1::PRelu>(m.get_match_root());
+        if (!prelu || ngraph::shape_size(prelu->get_input_shape(1)) == 1 || prelu->get_input_shape(1).size() != 1) {
+            return false;
+        }
+        ngraph::Shape new_shape(prelu->input_value(0).get_shape().size(), 1);
+        new_shape[new_shape.size() > 1 ? 1 : 0] = prelu->input_value(1).get_shape()[0];
+        auto slope = ngraph::op::util::reshapeTo(prelu->input_value(1), new_shape);
+        auto new_prelu = std::make_shared<ngraph::opset1::PRelu>(prelu->input(0).get_source_output(), slope);
+        new_prelu->set_friendly_name(prelu->get_friendly_name());
+        ngraph::copy_runtime_info(prelu, new_prelu);
+        ngraph::replace_node(prelu, new_prelu);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(prelu, "ReshapePRelu");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.hpp
new file mode 100644
index 00000000000..c448624e6ed
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_prelu.hpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class ReshapePRelu: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ReshapePRelu();
+};
+
+}  // namespace MKLDNNPlugin
\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp
new file mode 100644
index 00000000000..196af3640bd
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp
@@ -0,0 +1,153 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "rnn_sequences_optimization.hpp"
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include <transformations/utils/utils.hpp>
+#include <ngraph/variant.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeGRUSequenceTransposes, "OptimizeGRUSequenceTransposes", 0);
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeLSTMSequenceTransposes, "OptimizeLSTMSequenceTransposes", 0);
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeRNNSequenceTransposes, "OptimizeRNNSequenceTransposes", 0);
+
+namespace {
+    int64_t getSeqAxis(const std::shared_ptr<ngraph::Node>& sequenceOp) {
+        // Optimization.
+        // Plug-ins support seqAxis attribute (value 1 or 0) for Seq ops, but according to the spec we don't
+        // support this attribute and should insert Transpose layer before and after Seq op in TI to Sequences
+        // transformation. Additional Transpose layers affect the performance, so we try to detect pattern
+        // Transpose(axis_order={1,0,2}) -> Seq -> Transpose(axis_order={2,1,0,3}
+        // and replace unnecessary Transpose ops with SeqIE (seqAxis = 0) to transfer value
+        // of the attribute to plug-ins.
+        // todo: specify seqAxis attribute for Sequence ops.
+        int64_t seqAxis = 1; // default
+        const auto& target_inputs = sequenceOp->output(0).get_target_inputs();
+        if (target_inputs.size() == 1) {
+            const auto& transpose_before = std::dynamic_pointer_cast<ngraph::op::v1::Transpose>(sequenceOp->input_value(0).get_node_shared_ptr());
+            const auto& transpose_after = std::dynamic_pointer_cast<ngraph::op::v1::Transpose>(target_inputs.begin()->get_node()->shared_from_this());
+            if (transpose_after != nullptr && transpose_before != nullptr) {
+                auto order_before = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(
+                        transpose_before->input_value(1).get_node_shared_ptr());
+                auto order_after = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(
+                        transpose_after->input_value(1).get_node_shared_ptr());
+                if (order_before != nullptr && order_after != nullptr) {
+                    auto order_before_values = order_before->cast_vector<int64_t>();
+                    auto order_after_values = order_after->cast_vector<int64_t>();
+                    std::vector<int64_t> order_ref_before = {1, 0, 2};
+                    std::vector<int64_t> order_ref_after = {2, 1, 0, 3};
+                    if (order_before_values == order_ref_before && order_after_values == order_ref_after) {
+                        seqAxis = 0;
+                    }
+                }
+            }
+        }
+        return seqAxis;
+    }
+
+    bool transform(const std::shared_ptr<ngraph::Node>& sequenceOp) {
+        // Detect pattern: Transpose_before -> Seq -> Transpose_after
+        auto seqAxis = getSeqAxis(sequenceOp);
+        if (seqAxis == 0) {
+            ngraph::Output<ngraph::Node> in_0 = sequenceOp->get_input_source_output(0).get_node_shared_ptr()->get_input_source_output(0);
+
+            auto newInShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, sequenceOp->get_input_shape(0));
+            auto reshape1 = std::make_shared<ngraph::op::v1::Reshape>(in_0, newInShape, false);
+            ngraph::replace_node(sequenceOp->get_input_node_shared_ptr(0), {reshape1->output(0)});
+
+            const auto &gruTargetInputs = sequenceOp->output(0).get_target_inputs();
+            if (gruTargetInputs.empty())
+                return false;
+            auto transposeAfter = gruTargetInputs.begin()->get_node()->shared_from_this();
+
+            auto newOutShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{4}, transposeAfter->get_output_shape(0));
+            auto reshape2 = std::make_shared<ngraph::op::v1::Reshape>(sequenceOp->output(0), newOutShape, false);
+            reshape2->set_friendly_name(transposeAfter->get_friendly_name());
+            ngraph::replace_node(transposeAfter, {reshape2->output(0)});
+        } else {
+            auto originShape = sequenceOp->get_output_shape(0);
+            const auto targetInputs = sequenceOp->get_output_target_inputs(0);
+            if (targetInputs.empty()) {
+                return false;
+            }
+            auto seqOut = targetInputs.begin()->get_node()->shared_from_this();
+
+            auto tncShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, {originShape[2], originShape[0], originShape[3]});
+            auto reshape1 = std::make_shared<ngraph::op::v1::Reshape>(sequenceOp->output(0), tncShape, false);
+
+            auto order = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, {1, 0, 2});
+            auto transpose = std::make_shared<ngraph::op::v1::Transpose>(reshape1->output(0), order);
+
+            auto ndtcShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{4}, originShape);
+            auto reshape2 = std::make_shared<ngraph::op::v1::Reshape>(transpose->output(0), ndtcShape, false);
+            reshape2->set_friendly_name(sequenceOp->get_friendly_name()+".0");
+
+            ngraph::insert_new_node_between(sequenceOp, seqOut, reshape2);
+        }
+
+        sequenceOp->get_rt_info()["seqAxis"] = std::make_shared<ngraph::VariantWrapper<int64_t>>(seqAxis);
+
+        return true;
+    }
+} // namespace
+
+MKLDNNPlugin::OptimizeGRUSequenceTransposes::OptimizeGRUSequenceTransposes() {
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto gruSequence = std::dynamic_pointer_cast<ngraph::op::v5::GRUSequence>(m.get_match_root());
+        if (!gruSequence) {
+            return false;
+        }
+        // Bidirectional cases are not supported
+        if (gruSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+            return false;
+
+        return transform(gruSequence);
+    };
+
+    auto gruSequenceNgraph = ngraph::pattern::wrap_type<ngraph::op::v5::GRUSequence>();
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(gruSequenceNgraph, "OptimizeGRUSequenceTransposes");
+    this->register_matcher(m, callback);
+}
+
+MKLDNNPlugin::OptimizeRNNSequenceTransposes::OptimizeRNNSequenceTransposes() {
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto rnnSequence = std::dynamic_pointer_cast<ngraph::op::v5::RNNSequence>(m.get_match_root());
+        if (!rnnSequence) {
+            return false;
+        }
+        // Bidirectional cases are not supported
+        if (rnnSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+            return false;
+
+        return transform(rnnSequence);
+    };
+
+    auto rnnSequenceNgraph = ngraph::pattern::wrap_type<ngraph::op::v5::RNNSequence>();
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(rnnSequenceNgraph, "OptimizeRNNSequenceTransposes");
+    this->register_matcher(m, callback);
+}
+
+MKLDNNPlugin::OptimizeLSTMSequenceTransposes::OptimizeLSTMSequenceTransposes() {
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto lstmSequence = std::dynamic_pointer_cast<ngraph::op::v5::LSTMSequence>(m.get_match_root());
+        if (!lstmSequence) {
+            return false;
+        }
+        // Bidirectional cases are not supported
+        if (lstmSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+            return false;
+
+        return transform(lstmSequence);
+    };
+
+    auto lstmSequenceNgraph_0 = ngraph::pattern::wrap_type<ngraph::op::v0::LSTMSequence>();
+    auto lstmSequenceNgraph_5 = ngraph::pattern::wrap_type<ngraph::op::v5::LSTMSequence>();
+    const auto lstmSeqInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{lstmSequenceNgraph_0, lstmSequenceNgraph_5});
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(lstmSeqInputs, "OptimizeLSTMSequenceTransposes");
+
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp
new file mode 100644
index 00000000000..14cf1a585af
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class OptimizeGRUSequenceTransposes : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    OptimizeGRUSequenceTransposes();
+};
+
+class OptimizeLSTMSequenceTransposes : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    OptimizeLSTMSequenceTransposes();
+};
+
+class OptimizeRNNSequenceTransposes : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    OptimizeRNNSequenceTransposes();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
deleted file mode 100644
index 515bd288a13..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include "argmax_imp.hpp"
-
-#include <string>
-#include <vector>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class ArgMaxImpl: public ExtLayerBase {
-public:
-    explicit ArgMaxImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != 1 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
-
-            conf.out_max_val_ = layer->GetParamAsBool("out_max_val", false);
-            conf.top_k_       = layer->GetParamAsInt("top_k");
-
-            conf.has_axis_ = (layer->params.find("axis") != layer->params.end());
-            conf.axis_index_ = conf.has_axis_ ?
-                                std::stoi(layer->params.at("axis")) :0;
-
-            addConfig(layer, {DataConfigurator(ConfLayout::PLN, Precision::FP32)}, {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        SizeVector in_dims = inputs[0]->getTensorDesc().getDims();
-
-        float* src_data = inputs[0]->buffer();
-        float* dst_data = outputs[0]->buffer();
-
-        XARCH::arg_max_execute(src_data, dst_data, in_dims, conf);
-        return OK;
-    }
-
-private:
-    argmax_conf conf;
-};
-
-REG_FACTORY_FOR(ArgMaxImpl, ArgMax);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
deleted file mode 100644
index 2c07f2d42de..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
+++ /dev/null
@@ -1,417 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "argmax_imp.hpp"
-
-#include <cstring>
-#include <algorithm>
-#include <string>
-#include <vector>
-#include <cmath>
-#include <utility>
-#include <functional>
-#include <ie_parallel.hpp>
-#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
-#include <immintrin.h>
-#include "nodes/common/uni_simd.h"
-#endif
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-namespace XARCH {
-
-using Shape = std::vector<size_t>;
-
-#if defined(HAVE_AVX512F)
-    constexpr int count_vec = 32;
-#elif defined(HAVE_SSE) || defined(HAVE_AVX2)
-    constexpr int count_vec = 16;
-#endif
-
-inline int count(Shape dims, size_t start_ind, size_t end_ind) {
-    size_t count = 1;
-    for (size_t i = start_ind; i < end_ind; i++)
-        count *= dims[i];
-    return static_cast<int>(count);
-}
-
-inline int count(Shape dims, size_t start_ind = 0) {
-    return count(dims, start_ind, dims.size());
-}
-
-template <bool out_max_val>
-void argmax_one_class_has_axis(const float* src_data, float* dst_data, Shape in_dims, argmax_conf& conf) {
-    const auto axis_index_ = conf.axis_index_;
-    int axis_ = (axis_index_ < 0) ? axis_index_ + static_cast<int>(in_dims.size()) : axis_index_;
-    const int dim = static_cast<int>(in_dims[axis_]);
-    int before_num = count(in_dims, 0, axis_);
-    int after_num = count(in_dims, axis_ + 1, in_dims.size());
-    int first_index = 0;
-#if defined(HAVE_AVX512F)
-    const int block_size = 16;
-    typedef __m512 vec_type_f;
-    typedef __m512i vec_type_i;
-    typedef __mmask16 vmask_type;
-#elif defined(HAVE_AVX2)
-    const int block_size = 8;
-    typedef __m256 vec_type_f;
-    typedef __m256i vec_type_i;
-    typedef __m256 vmask_type;
-#elif defined(HAVE_SSE)
-    const int block_size = 4;
-    typedef __m128 vec_type_f;
-    typedef __m128i vec_type_i;
-    typedef __m128 vmask_type;
-#endif
-
-#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
-    parallel_for2d(before_num, after_num / block_size, [&](int i0, int ib1) {
-        int s_index = i0 * dim * after_num + ib1 * block_size;
-        vec_type_f vmax_val = _mm_uni_loadu_ps(src_data + s_index);
-        vec_type_i vindex_max_val = _mm_uni_setzero_si();
-        for (int i2 = 1; i2 < dim; i2++) {
-            s_index += after_num;
-            vec_type_f vsrc = _mm_uni_loadu_ps(src_data + s_index);
-            vmask_type vmask = _mm_uni_cmpgt_ps(vsrc, vmax_val);
-            vmax_val = _mm_uni_blendv_ps(vmax_val, vsrc, vmask);
-            if (!out_max_val) {
-                vec_type_i vindex_cur_val = _mm_uni_set1_epi32(i2);
-#if defined(HAVE_AVX512F)
-                vindex_max_val = _mm512_mask_blend_epi32(vmask, vindex_max_val, vindex_cur_val);
-#else
-                vindex_max_val = _mm_uni_blendv_epi8(vindex_max_val, vindex_cur_val, _mm_uni_castps_si(vmask));
-#endif
-            }
-        }
-        if (!out_max_val) {
-            vec_type_f vindex_max_val_fp32 = _mm_uni_cvtepi32_ps(vindex_max_val);
-            _mm_uni_storeu_ps(dst_data + i0 * after_num + ib1 * block_size, vindex_max_val_fp32);
-        } else {
-            _mm_uni_storeu_ps(dst_data + i0 * after_num + ib1 * block_size, vmax_val);
-        }
-    });
-    first_index = after_num / block_size * block_size;
-#endif
-    int rest = after_num - first_index;
-    parallel_for2d(before_num, rest, [&](int i0, int i1) {
-        int index_max_val = 0;
-        int s_index = i0 * dim * after_num + first_index + i1;
-        float max_val = src_data[s_index];
-        for (int i2 = 1; i2 < dim; i2++) {
-            s_index += after_num;
-            if (src_data[s_index] > max_val) {
-                max_val = src_data[s_index];
-                if (!out_max_val) {
-                    index_max_val = i2;
-                }
-            }
-        }
-        if (!out_max_val)
-            dst_data[i0 * after_num + first_index + i1] = static_cast<float>(index_max_val);
-        else
-            dst_data[i0 * after_num + first_index + i1] = max_val;
-    });
-}
-
-template <bool out_max_val>
-void argmax_one_class(const float* src_data, float* dst_data, Shape in_dims) {
-    const int dim = count(in_dims, 1);
-    int before_num = in_dims[0];
-    parallel_for(before_num, [&](int i0) {
-        int index_max_val = 0;
-        int s_index = i0 * dim;
-        float max_val = src_data[s_index];
-        for (int i1 = 1; i1 < dim; i1++) {
-            s_index++;
-            if (src_data[s_index] > max_val) {
-                max_val = src_data[s_index];
-                index_max_val = i1;
-            }
-        }
-        if (!out_max_val) {
-            dst_data[i0] = static_cast<float>(index_max_val);
-        } else {
-            dst_data[i0 * 2] = static_cast<float>(index_max_val);
-            dst_data[i0 * 2 + 1] = max_val;
-        }
-    });
-}
-
-template <bool out_max_val>
-void argmax_many_classes_has_axis(const float* src_data, float* dst_data, Shape in_dims, argmax_conf& conf) {
-    const auto axis_index_ = conf.axis_index_;
-    const auto top_k_ = conf.top_k_;
-    int axis_ = (axis_index_ < 0) ? axis_index_ + static_cast<int>(in_dims.size()) : axis_index_;
-    const int dim = static_cast<int>(in_dims[axis_]);
-    int before_num = count(in_dims, 0, axis_);
-    int after_num = count(in_dims, axis_ + 1, in_dims.size());
-    int first_index = 0;
-#if defined(HAVE_AVX512F)
-    const int block_size = 16;
-    typedef __m512 vec_type_f;
-    typedef __m512i vec_type_i;
-    typedef __mmask16 vmask_type;
-#elif defined(HAVE_AVX2)
-    const int block_size = 8;
-    typedef __m256 vec_type_f;
-    typedef __m256i vec_type_i;
-    typedef __m256 vmask_type;
-#elif defined(HAVE_SSE)
-    const int block_size = 4;
-    typedef __m128 vec_type_f;
-    typedef __m128i vec_type_i;
-    typedef __m128 vmask_type;
-#endif
-
-#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
-    if (top_k_ < count_vec) {
-        parallel_for2d(before_num, after_num / block_size, [&](int i0, int ib1) {
-#if defined(HAVE_AVX512F)
-            const int N = 32;
-            vec_type_f vmax_values[N];
-            vec_type_i vmax_indexes[N];
-#else
-            const int N = 16;
-            vec_type_f vmax_values[N];
-            vec_type_i vmax_indexes[N];
-#endif
-            vec_type_f vtmp;
-            vec_type_i vtmp_indexes;
-            vmask_type vmask;
-            int s_index = i0 * dim * after_num + ib1 * block_size;
-
-            auto vswap_func = [&](int index1, int index2) {
-                vtmp = vmax_values[index1];
-                vmax_values[index1] = _mm_uni_blendv_ps(vmax_values[index1], vmax_values[index2], vmask);
-                vmax_values[index2] = _mm_uni_blendv_ps(vmax_values[index2], vtmp, vmask);
-                if (!out_max_val) {
-                    vtmp_indexes = vmax_indexes[index1];
-#if defined(HAVE_AVX512F)
-                    vmax_indexes[index1] = _mm512_mask_blend_epi32(vmask, vmax_indexes[index1], vmax_indexes[index2]);
-                    vmax_indexes[index2] = _mm512_mask_blend_epi32(vmask, vmax_indexes[index2], vtmp_indexes);
-#else
-                    vmax_indexes[index1] = _mm_uni_blendv_epi8(vmax_indexes[index1], vmax_indexes[index2], _mm_uni_castps_si(vmask));
-                    vmax_indexes[index2] = _mm_uni_blendv_epi8(vmax_indexes[index2], vtmp_indexes, _mm_uni_castps_si(vmask));
-#endif
-                }
-            };
-
-            for (int i2 = 0; i2 < top_k_; i2++) {
-                vmax_values[i2] = _mm_uni_loadu_ps(src_data + s_index);
-                if (!out_max_val) {
-                    vmax_indexes[i2] = _mm_uni_set1_epi32(i2);
-                }
-                s_index += after_num;
-            }
-            for (int i2 = 0; i2 < top_k_ - 1; i2++) {
-                for (int i3 = top_k_ - 1; i3 > i2; i3--) {
-                    vmask = _mm_uni_cmpgt_ps(vmax_values[i3], vmax_values[i3 - 1]);
-#if defined(HAVE_AVX512F)
-                    if (vmask) {
-                        vswap_func(i3, i3 - 1);
-                    }
-#else
-                    int swap = _mm_uni_movemask_ps(vmask);
-                    if (swap) {
-                        vswap_func(i3, i3 - 1);
-                    }
-#endif
-                }
-            }
-            for (int i2 = top_k_; i2 < dim; i2++) {
-                vmax_values[top_k_] = _mm_uni_loadu_ps(src_data + s_index);
-                if (!out_max_val) {
-                    vmax_indexes[top_k_] = _mm_uni_set1_epi32(i2);
-                }
-                for (int i3 = top_k_; i3 > 0; i3--) {
-                    vmask = _mm_uni_cmpgt_ps(vmax_values[i3], vmax_values[i3 - 1]);
-#if defined(HAVE_AVX512F)
-                    if (vmask) {
-                        vswap_func(i3, i3 - 1);
-                    } else {
-                        break;
-                    }
-#else
-                    int swap = _mm_uni_movemask_ps(vmask);
-                    if (swap) {
-                        vswap_func(i3, i3 - 1);
-                    } else {
-                        break;
-                    }
-#endif
-                }
-                s_index += after_num;
-            }
-            for (int i2 = 0; i2 < top_k_; i2++) {
-                if (!out_max_val) {
-                    _mm_uni_storeu_ps(dst_data + (i0 * top_k_ + i2) * after_num + ib1 * block_size,
-                                  _mm_uni_cvtepi32_ps(vmax_indexes[i2]));
-                } else {
-                    _mm_uni_storeu_ps(dst_data + (i0 * top_k_ + i2) * after_num + ib1 * block_size, vmax_values[i2]);
-                }
-            }
-        });
-        first_index = after_num / block_size * block_size;
-    }
-#endif
-    int rest = after_num - first_index;
-    parallel_for2d(before_num, rest, [&](int i0, int i1) {
-        std::vector<float> max_values(top_k_ + 1);
-        std::vector<int> max_indexes(top_k_ + 1);
-        float tmp_value;
-        int tmp_index;
-        int s_index = i0 * dim * after_num + first_index + i1;
-
-        auto swap_func = [&](int index1, int index2) {
-            tmp_value = max_values[index1];
-            max_values[index1] = max_values[index2];
-            max_values[index2] = tmp_value;
-            if (!out_max_val) {
-                tmp_index = max_indexes[index1];
-                max_indexes[index1] = max_indexes[index2];
-                max_indexes[index2] = tmp_index;
-            }
-        };
-
-        for (int i2 = 0; i2 < top_k_; i2++) {
-            max_values[i2] = src_data[s_index];
-            if (!out_max_val) {
-                max_indexes[i2] = i2;
-            }
-            s_index += after_num;
-        }
-        for (int i2 = 0; i2 < top_k_ - 1; i2++) {
-            for (int i3 = top_k_ - 1; i3 > i2; i3--) {
-                if (max_values[i3] > max_values[i3 - 1]) {
-                    swap_func(i3, i3 - 1);
-                }
-            }
-        }
-        for (int i2 = top_k_; i2 < dim; i2++) {
-            max_values[top_k_] = src_data[s_index];
-            if (!out_max_val) {
-                max_indexes[top_k_] = i2;
-            }
-            for (int i3 = top_k_; i3 > 0; i3--) {
-                if (max_values[i3] > max_values[i3 - 1]) {
-                    swap_func(i3, i3 - 1);
-                } else {
-                    break;
-                }
-            }
-            s_index += after_num;
-        }
-        for (int i2 = 0; i2 < top_k_; i2++) {
-            if (!out_max_val) {
-                dst_data[i0 * top_k_ * after_num + i2 * after_num + first_index + i1] = static_cast<float>(max_indexes[i2]);
-            } else {
-                dst_data[i0 * top_k_ * after_num + i2 * after_num + first_index + i1] = max_values[i2];
-            }
-        }
-    });
-}
-
-template <bool out_max_val>
-void argmax_many_classes(const float* src_data, float* dst_data, Shape in_dims, argmax_conf& conf) {
-    const int dim = count(in_dims, 1);
-    auto top_k_ = conf.top_k_;
-    int before_num = in_dims[0];
-    parallel_for(before_num, [&](int i0) {
-        std::vector<float> max_values(top_k_ + 1);
-        std::vector<int> max_indexes(top_k_ + 1);
-        float tmp_value;
-        int tmp_index;
-        int s_index = i0 * dim;
-
-        auto swap_func = [&](int index1, int index2) {
-            tmp_value = max_values[index1];
-            max_values[index1] = max_values[index2];
-            max_values[index2] = tmp_value;
-
-            tmp_index = max_indexes[index1];
-            max_indexes[index1] = max_indexes[index2];
-            max_indexes[index2] = tmp_index;
-        };
-
-        for (int i2 = 0; i2 < top_k_; i2++) {
-            max_values[i2] = src_data[s_index];
-            max_indexes[i2] = i2;
-            s_index++;
-        }
-        for (int i2 = 0; i2 < top_k_ - 1; i2++) {
-            for (int i3 = top_k_ - 1; i3 > i2; i3--) {
-                if (max_values[i3] > max_values[i3 - 1]) {
-                    swap_func(i3, i3 - 1);
-                }
-            }
-        }
-        for (int i2 = top_k_; i2 < dim; i2++) {
-            max_values[top_k_] = src_data[s_index];
-            max_indexes[top_k_] = i2;
-            for (int i3 = top_k_; i3 > 0; i3--) {
-                if (max_values[i3] > max_values[i3 - 1]) {
-                    swap_func(i3, i3 - 1);
-                } else {
-                    break;
-                }
-            }
-            s_index++;
-        }
-        for (int i2 = 0; i2 < top_k_; i2++) {
-            if (!out_max_val) {
-                dst_data[i0 * top_k_ + i2] = static_cast<float>(max_indexes[i2]);
-            } else {
-                dst_data[i0 * 2 * top_k_ + i2] = static_cast<float>(max_indexes[i2]);
-                dst_data[i0 * 2 * top_k_ + top_k_ + i2] = max_values[i2];
-            }
-        }
-    });
-}
-
-void arg_max_execute(const float* input, float *output, std::vector<size_t> dims, argmax_conf& conf) {
-    Shape in_dims = dims;
-
-    const float* src_data = input;
-    float* dst_data = output;
-
-    auto top_k_ = conf.top_k_;
-    auto has_axis_ = conf.has_axis_;
-    auto out_max_val_ = conf.out_max_val_;
-
-    if (top_k_ == 1) {
-        if (has_axis_) {
-            if (out_max_val_) {
-                argmax_one_class_has_axis<true>(src_data, dst_data, in_dims, conf);
-            } else {
-                argmax_one_class_has_axis<false>(src_data, dst_data, in_dims, conf);
-            }
-        } else {
-            if (out_max_val_) {
-                argmax_one_class<true>(src_data, dst_data, in_dims);
-            } else {
-                argmax_one_class<false>(src_data, dst_data, in_dims);
-            }
-        }
-    } else {
-        if (has_axis_) {
-            if (out_max_val_) {
-                argmax_many_classes_has_axis<true>(src_data, dst_data, in_dims, conf);
-            } else {
-                argmax_many_classes_has_axis<false>(src_data, dst_data, in_dims, conf);
-            }
-        } else {
-            if (out_max_val_) {
-                argmax_many_classes<true>(src_data, dst_data, in_dims, conf);
-            } else {
-                argmax_many_classes<false>(src_data, dst_data, in_dims, conf);
-            }
-        }
-    }
-}
-
-}  // namespace XARCH
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.hpp b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.hpp
deleted file mode 100644
index d447e1a4e06..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.hpp
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <cstddef>
-#include <vector>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-struct argmax_conf {
-    bool out_max_val_;
-    int top_k_;
-    bool has_axis_;
-    int axis_index_;
-};
-
-namespace XARCH {
-
-void arg_max_execute(const float* inputs, float *outputs, std::vector<size_t> dims, argmax_conf& conf);
-
-}  // namespace XARCH
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/base.hpp b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
index 7720ad36762..b611c8eb0a4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
@@ -5,8 +5,11 @@
 #pragma once
 
 #include <ie_iextension.h>
-#include <legacy/ie_util_internal.hpp>
 #include "nodes/list.hpp"
+#include "common/tensor_desc_creator.h"
+#include "ngraph/descriptor/tensor.hpp"
+#include <ie_ngraph_utils.hpp>
+#include "cpu_types.h"
 
 #include <string>
 #include <vector>
@@ -53,99 +56,76 @@ public:
     }
 
 protected:
-    enum class ConfLayout { ANY, PLN, BLK8, BLK16 };
+    MKLDNNPlugin::Algorithm getAlgorithm() const {
+        return algorithm;
+    }
+    MKLDNNPlugin::Algorithm algorithm;
 
     class DataConfigurator {
     public:
-        explicit DataConfigurator(ConfLayout l):
-            layout(l) {}
+        DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, Precision prc = Precision::UNSPECIFIED, bool constant = false, int inplace = -1) :
+                tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), constant(constant), inplace(inplace) {}
 
-        DataConfigurator(ConfLayout l, bool constant, int inplace = -1, Precision::ePrecision prc = Precision::UNSPECIFIED):
-            layout(l), constant(constant), inplace(inplace), prc(prc) {}
+        DataConfigurator(const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr& tensorDescCreator, Precision prc = Precision::UNSPECIFIED,
+                bool constant = false, int inplace = -1) : tensorDescCreator(tensorDescCreator), prc(prc), constant(constant), inplace(inplace) {}
 
-        DataConfigurator(ConfLayout l, Precision::ePrecision prc):
-            layout(l), prc(prc) {}
-
-        ConfLayout layout;
-        bool constant = false;
-        int inplace = -1;
-        Precision::ePrecision prc = Precision::UNSPECIFIED;     // by default use the layer precision
+        const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator;
+        const bool constant = false;
+        const int inplace = -1;
+        const Precision prc = Precision::UNSPECIFIED; // By default ngraph node precision is used
+    private:
+        static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) {
+            auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators();
+            if (creators.find(tensorDescType) == creators.end()) {
+                IE_THROW() << "Cannot find tensor descriptor creator";
+            }
+            return creators.at(tensorDescType);
+        }
     };
 
-    void addConfig(const CNNLayer* layer, std::vector<DataConfigurator> in_l,
-            std::vector<DataConfigurator> out_l, bool dynBatchSupport = false) {
+    void addConfig(const std::shared_ptr<ngraph::Node>& op,
+                   const std::vector<DataConfigurator>& inDataConfigurators,
+                   const std::vector<DataConfigurator>& outDataConfigurators,
+                   bool dynBatchSupport = false) {
         LayerConfig config;
 
-        if (in_l.size() != layer->insData.size())
-            IE_THROW() << "Incorrect number of input edges for layer " << layer->name << ". Expected " << layer->insData.size()
-                << " but layout specification provided for " << in_l.size();
-        if (out_l.size() != layer->outData.size())
-            IE_THROW() << "Incorrect number of output edges for layer " << layer->name << ". Expected " << layer->outData.size()
-                << " but layout specification provided for " << out_l.size();
+        if (inDataConfigurators.size() != op->get_input_size())
+            IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of inputs: " <<
+                                  "expected: " << op->get_input_size() << ", provided: " << inDataConfigurators.size();
+        if (outDataConfigurators.size() != op->get_output_size())
+            IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of outputs: " <<
+                               "expected: " << op->get_output_size() << ", provided: " << outDataConfigurators.size();
 
-        // Fill tensor parameters into config
-        auto fill_port = [] (std::vector<DataConfig>& port, DataConfigurator conf, const DataPtr& data) {
-            auto div_up = [](const int a, const int b) -> int {
-                if (!b)
-                    return 0;
-                return (a + b - 1) / b;
-            };
-            if (!data) IE_THROW() << "Cannot get input data!";
+        auto fill_port = [] (const DataConfigurator& dataConfigurator, const ngraph::descriptor::Tensor& tensor, std::vector<DataConfig>& port) -> bool {
+            // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator.
+            // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank.
+            if (tensor.get_shape().size() < dataConfigurator.tensorDescCreator->getMinimalRank())
+                return false;
+
+            auto precision = dataConfigurator.prc != Precision::UNSPECIFIED ? dataConfigurator.prc : details::convertPrecision(tensor.get_element_type());
 
             DataConfig dataConfig;
-            dataConfig.inPlace = conf.inplace;
-            dataConfig.constant = conf.constant;
+            dataConfig.inPlace = dataConfigurator.inplace;
+            dataConfig.constant = dataConfigurator.constant;
+            dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(precision, tensor.get_shape());
 
-            const TensorDesc& data_desc = data->getTensorDesc();
-            const SizeVector& data_dims = data_desc.getDims();
-
-            std::vector<size_t> blocks = data_dims;
-            std::vector<size_t> order(blocks.size());
-            for (size_t i = 0; i < order.size(); i++) order[i] = i;
-
-            const bool isInt8 = (data->getPrecision() == Precision::I8 || data->getPrecision() == Precision::U8);
-
-            if (conf.layout == ConfLayout::BLK8 || conf.layout == ConfLayout::BLK16) {
-                if (data_dims.size() < 4 || data_dims.size() > 5)
-                    IE_THROW() << "Inapplicable blocking layout."
-                        << "Tensor should be 4D or 5D.";
-
-                int blk_size = conf.layout == ConfLayout::BLK8 ? 8 : 16;
-
-                // Blocking through Channel dimension. Like [nChwXc]
-                order.push_back(1);
-                blocks[1] = div_up(blocks[1], blk_size);
-                blocks.push_back(blk_size);
-            } else if (isInt8) {
-                if (data_dims.size() == 4) {
-                    order = {0, 2, 3, 1};
-                    blocks = {data_dims[0], data_dims[2], data_dims[3], data_dims[1]};
-                } else if (data_dims.size() == 5) {
-                    order = {0, 2, 3, 4, 1};
-                    blocks = {data_dims[0], data_dims[2], data_dims[3], data_dims[4], data_dims[1]};
-                }  // all over keep original plain format
-
-                conf.layout = ConfLayout::PLN;
-            }
-
-            InferenceEngine::Precision precision = (conf.prc == Precision::UNSPECIFIED) ? data_desc.getPrecision() : Precision(conf.prc);
-            if (conf.layout == ConfLayout::ANY) {
-                dataConfig.desc = TensorDesc(precision, data_dims, InferenceEngine::Layout::ANY);
-            } else {
-                dataConfig.desc = TensorDesc(precision, data_dims, {blocks, order});
-            }
             port.push_back(dataConfig);
+
+            return true;
         };
 
-        for (size_t i = 0; i < in_l.size(); i++)
-            fill_port(config.inConfs, in_l[i], layer->insData[i].lock());
+        for (size_t i = 0; i < inDataConfigurators.size(); i++)
+            if (!fill_port(inDataConfigurators[i], op->get_input_tensor(i), config.inConfs))
+                return;
 
-        for (size_t i = 0; i < out_l.size(); i++)
-            fill_port(config.outConfs, out_l[i], layer->outData[i]);
+        for (size_t i = 0; i < outDataConfigurators.size(); i++)
+            if (!fill_port(outDataConfigurators[i], op->get_output_tensor(i), config.outConfs))
+                return;
 
         config.dynBatchSupport = dynBatchSupport;
         confs.push_back(config);
     }
+
     std::string errorMsg;
     std::vector<LayerConfig> confs;
 };
@@ -153,20 +133,22 @@ protected:
 template <class IMPL>
 class ImplFactory : public ILayerImplFactory {
 public:
-    explicit ImplFactory(const CNNLayer *layer) {
-        cnnLayer = InferenceEngine::clonelayer(*layer);
-        cnnLayer->_fusedWith = layer->_fusedWith;
-        cnnLayer->insData = layer->insData;
-        cnnLayer->outData = layer->outData;
-    }
+    explicit ImplFactory(const std::shared_ptr<ngraph::Node>& op) : ngraphOp(op) {}
 
     // First implementation has more priority than next
     StatusCode getImplementations(std::vector<ILayerImpl::Ptr>& impls, ResponseDesc *resp) noexcept override {
-        impls.push_back(ILayerImpl::Ptr(new IMPL(cnnLayer.get())));
+        try {
+            impls.push_back(ILayerImpl::Ptr(new IMPL(ngraphOp)));
+        } catch (const InferenceEngine::Exception& ex) {
+            strncpy(resp->msg, ex.what(), sizeof(resp->msg) - 1);
+            IE_SUPPRESS_DEPRECATED_START
+            return ex.getStatus() != OK ? ex.getStatus() : GENERAL_ERROR;
+            IE_SUPPRESS_DEPRECATED_END
+        }
         return OK;
     }
 protected:
-    InferenceEngine::CNNLayerPtr cnnLayer;
+    const std::shared_ptr<ngraph::Node> ngraphOp;
 };
 
 #define REG_FACTORY_FOR(__prim, __type) \
diff --git a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp b/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
deleted file mode 100644
index b33a9cbaee7..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-#include "ie_parallel.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <set>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class BatchToSpaceImpl: public ExtLayerBase {
-public:
-    explicit BatchToSpaceImpl(const CNNLayer *layer) {
-        try {
-            const auto batchToSpaceLayer = dynamic_cast<const BatchToSpaceLayer*>(layer);
-            if (!batchToSpaceLayer)
-                IE_THROW() << "BatchToSpace layer with name '" << layer->name << "' isn't instance of BatchToSpaceLayer class";
-
-            if (batchToSpaceLayer->insData.size() != 4)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has incorrect number of input edges";
-
-            if (batchToSpaceLayer->outData.size() != 1)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has incorrect number of output edges";
-
-            auto data = batchToSpaceLayer->insData[0].lock();
-            if (!data)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has nullable input data";
-
-            inDims = data->getTensorDesc().getDims();
-            if (inDims.size() < 4)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' doesn't support dimensions with rank less than 4";
-
-            if (inDims.size() > 5)
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' doesn't support dimensions with rank greater than 5";
-
-            outDims = batchToSpaceLayer->outData[0]->getTensorDesc().getDims();
-            if (inDims.size() != outDims.size())
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has incorrect number of input/output dimensions";
-
-            const auto precision = data->getTensorDesc().getPrecision();
-            const std::set<size_t> supported_precision_sizes = {1, 2, 4, 8};
-            if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end())
-                IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has unsupported precision: " << precision.name();
-
-            blockShapeIn = batchToSpaceLayer->_block_shape;
-            cropsBeginIn = batchToSpaceLayer->_crops_begin;
-
-            auto createConfig = [&](Layout layout) {
-                LayerConfig config;
-                // TODO: remove Const layers
-                for (int i = 0; i < batchToSpaceLayer->insData.size(); i++) {
-                    auto inData = batchToSpaceLayer->insData[i].lock();
-                    if (!inData)
-                        IE_THROW() << "BatchToSpace layer with name '" << batchToSpaceLayer->name << "' has nullable input data";
-                    DataConfig inConfig;
-                    if (i == 0)
-                        inConfig.desc = TensorDesc(precision, inData->getTensorDesc().getDims(), layout);
-                    else
-                        inConfig.desc = TensorDesc(inData->getPrecision(), inData->getTensorDesc().getDims(), inData->getTensorDesc().getLayout());
-                    config.inConfs.push_back(inConfig);
-                }
-
-                DataConfig outConfig;
-                outConfig.desc = TensorDesc(precision, outDims, layout);
-                config.outConfs.push_back(outConfig);
-
-                config.dynBatchSupport = false;
-                confs.push_back(config);
-            };
-
-            createConfig(inDims.size() == 4 ? NHWC : NDHWC);
-            createConfig(TensorDesc::getLayoutByDims(inDims));
-
-            std::vector<std::pair<ConfLayout, ConfLayout>>  blockConfs { };
-            if (inDims[1] % 8 == 0)  blockConfs.push_back({ConfLayout::BLK8, ConfLayout::BLK8});
-            if (inDims[1] % 16 == 0) blockConfs.push_back({ConfLayout::BLK16, ConfLayout::BLK16});
-            for (auto conf : blockConfs) {
-                addConfig(layer, {DataConfigurator(conf.first, precision),
-                                  DataConfigurator(ConfLayout::PLN, batchToSpaceLayer->insData[1].lock()->getPrecision()),
-                                  DataConfigurator(ConfLayout::PLN, batchToSpaceLayer->insData[2].lock()->getPrecision()),
-                                  DataConfigurator(ConfLayout::PLN, batchToSpaceLayer->insData[3].lock()->getPrecision())},
-                          {DataConfigurator(conf.second, precision)});
-            }
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-    StatusCode execute(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs, ResponseDesc *resp) noexcept override {
-        switch (inputs[0]->getTensorDesc().getPrecision().size()) {
-            case 1: batchToSpaceKernel<PrecisionTrait<Precision::U8>::value_type> (inputs, outputs);  break;
-            case 2: batchToSpaceKernel<PrecisionTrait<Precision::U16>::value_type>(inputs, outputs); break;
-            case 4: batchToSpaceKernel<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs); break;
-            default: {
-                if (resp) {
-                    std::string errorMsg = "BatchToSpace layer does not support precision '"
-                                           + std::string(inputs[0]->getTensorDesc().getPrecision().name()) + "'";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                    return GENERAL_ERROR;
-                }
-            }
-        }
-        return OK;
-    }
-
-private:
-    std::vector<size_t> getShape5D(const SizeVector &shape) {
-        std::vector<size_t> shape5D(5, 1);
-        for (int i = 0; i < 2; i++) {
-            shape5D[i] = shape[i];
-            shape5D[4 - i] = shape[shape.size() - 1 - i];
-        }
-        shape5D[2] = shape.size() == 5 ? shape[2] : shape5D[2];
-        return shape5D;
-    }
-
-    template<typename T>
-    void batchToSpaceKernel(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs) noexcept {
-        const T *srcData = inputs[0]->cbuffer().as<const T *>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        T *dstData = outputs[0]->buffer().as<T *>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        const auto layout = inputs[0]->getTensorDesc().getLayout();
-        const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC;
-        const auto dimsSize = inDims.size();
-
-        auto inShape5D  = getShape5D(inDims);
-        auto outShape5D = getShape5D(outDims);
-        auto blockShape = getShape5D(blockShapeIn);
-
-        if (layout == NHWC || layout == NDHWC) {
-            inShape5D.push_back(inShape5D[1]);
-            inShape5D.erase(inShape5D.begin() + 1);
-            outShape5D.push_back(outShape5D[1]);
-            outShape5D.erase(outShape5D.begin() + 1);
-            blockShape.push_back(blockShape[1]);
-            blockShape.erase(blockShape.begin() + 1);
-        }
-
-        const size_t blockSize = blocked ? outputs[0]->getTensorDesc().getBlockingDesc().getBlockDims().back() : 1lu;
-        const size_t blockCountInput = inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1];
-        const size_t blockCountOutput = outputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1];
-        const auto blockRemainder = inShape5D[1] % blockSize;
-        const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;
-
-        const size_t inSpatialStep = inShape5D[2] * inShape5D[3] * inShape5D[4];
-        const size_t inBatchStep = (blocked ? blockSize * blockCountInput : inShape5D[1]) * inSpatialStep;
-
-        const size_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4];
-        const size_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep;
-
-        size_t channels = (inShape5D[1] / blockSize);
-        channels = channels == 0 ? 1 : channels;
-        const size_t workAmount = inShape5D[0] * channels;
-
-        parallel_nt(0, [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(workAmount, nthr, ithr, start, end);
-            std::vector<size_t> indxStart(2, 0);
-            std::vector<size_t> indxEnd(2, 0);
-            parallel_it_init(start, indxStart[0], inShape5D[0], indxStart[1], channels);
-            parallel_it_init((end - 1), indxEnd[0], inShape5D[0], indxEnd[1], channels);
-            std::vector<int64_t> oAdd(5, 1);
-            std::vector<size_t> begin(5, 0);
-            std::vector<size_t> finish(5, 1);
-            for (size_t i0 = indxStart[0]; i0 < indxEnd[0] + 1; ++i0) {
-                int64_t bIdx = i0 / outShape5D[0];
-                const size_t srcIdx0 = i0 * inBatchStep;
-                const size_t dstIdx0 = (i0 - (bIdx * outShape5D[0])) * outBatchStep;
-                oAdd[4] = bIdx % blockShapeIn[dimsSize - 1] - cropsBeginIn[dimsSize - 1];
-                bIdx /= blockShapeIn[dimsSize - 1];
-                oAdd[3] = bIdx % blockShapeIn[dimsSize - 2] - cropsBeginIn[dimsSize - 2];
-                bIdx /= blockShapeIn[dimsSize - 2];
-                oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - cropsBeginIn[2] : 0lu;
-                bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
-                oAdd[1] = bIdx % blockShapeIn[1] - cropsBeginIn[1];
-                if (layout == NHWC || layout == NDHWC) {
-                    oAdd.push_back(oAdd[1]);
-                    oAdd.erase(oAdd.begin() + 1);
-                }
-                begin[1] = (blockShape[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
-                finish[1] = (outShape5D[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
-                begin[2] = (blockShape[2] - 1 - oAdd[2]) / blockShape[2];
-                finish[2] = (outShape5D[2] - 1 - oAdd[2]) / blockShape[2];
-                begin[3] = (blockShape[3] - 1 - oAdd[3]) / blockShape[3];
-                finish[3] = (outShape5D[3] - 1 - oAdd[3]) / blockShape[3];
-                begin[4] = (blockShape[4] - 1 - oAdd[4]) / blockShape[4];
-                finish[4] = (outShape5D[4] - 1 - oAdd[4]) / blockShape[4];
-                const int64_t addTmpOC = blocked ? 0lu : oAdd[1];
-                const int64_t addTmpOc = blocked ? oAdd[1] : 0lu;
-                indxStart[1] = begin[1] > indxStart[1] ? begin[1] : indxStart[1];
-                const size_t lastI1 = i0 == indxEnd[0] ? (indxEnd[1] > finish[1] ? finish[1] : indxEnd[1]) : finish[1];
-                for (; indxStart[1] < lastI1 + 1; ++indxStart[1]) {
-                    const size_t block = indxStart[1] == finish[1] ? lastBlock : blockSize;
-                    const int64_t tmpOC = indxStart[1] * blockShape[1] + addTmpOC;
-                    const size_t srcIdx1 = srcIdx0 + indxStart[1] * inSpatialStep * blockSize;
-                    const size_t dstIdx1 = dstIdx0 + tmpOC * outSpatialStep * blockSize;
-                    const size_t itEnd = blocked ? ((block - 1) * blockShape[1] + oAdd[1]) / blockSize : 0lu;
-                    for (size_t i2 = begin[2]; i2 < finish[2] + 1; ++i2) {
-                        const int64_t tmpOd = i2 * blockShape[2] + oAdd[2];
-                        const size_t srcIdx2 = srcIdx1 + i2 * inShape5D[3] * inShape5D[4] * blockSize;
-                        const size_t dstIdx2 = dstIdx1 + tmpOd * outShape5D[3] * outShape5D[4] * blockSize;
-                        for (size_t i3 = begin[3]; i3 < finish[3] + 1; ++i3) {
-                            const int64_t tmpOh = i3 * blockShape[3] + oAdd[3];
-                            const size_t srcIdx3 = srcIdx2 + i3 * inShape5D[4] * blockSize;
-                            const size_t dstIdx3 = dstIdx2 + tmpOh * outShape5D[4] * blockSize;
-                            for (size_t i4 = begin[4]; i4 < finish[4] + 1; ++i4) {
-                                const int64_t tmpOw = i4 * blockShape[4] + oAdd[4];
-                                const size_t srcIdx4 = srcIdx3 + i4 * blockSize;
-                                const size_t dstIdx4 = dstIdx3 + tmpOw * blockSize;
-                                for (size_t it = 0; it < itEnd + 1; ++it) {
-                                    const size_t i5Begin = it == 0 ? 0 : (it * blockSize - 1 - oAdd[1]) / blockShape[1] + 1;
-                                    const size_t i5End = it == itEnd ? (block - 1) : ((it + 1) * blockSize - 1 - oAdd[1]) / blockShape[1];
-                                    for (size_t i5 = i5Begin; i5 < i5End + 1; ++i5) {
-                                        const int64_t tmpOc = i5 * blockShape[1] + addTmpOc;
-                                        const size_t srcIdx5 = srcIdx4 + i5;
-                                        const size_t dstIdx5 = dstIdx4 + it * outSpatialStep * blockSize + (tmpOc - it * blockSize);
-                                        dstData[dstIdx5] = srcData[srcIdx5];
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-                indxStart[1] = 0lu;
-            }
-        });
-    }
-
-    SizeVector inDims;
-    SizeVector outDims;
-    std::vector<size_t> blockShapeIn;
-    std::vector<size_t> cropsBeginIn;
-};
-
-REG_FACTORY_FOR(BatchToSpaceImpl, BatchToSpace);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
deleted file mode 100644
index c4310e8ad02..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <cassert>
-#include "ie_parallel.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class BroadcastImpl: public ExtLayerBase {
-public:
-    explicit BroadcastImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-
-            if (layer->insData.size() != 2)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
-
-            SizeVector shape_dims = layer->insData[BROADCAST_SHAPE].lock()->getTensorDesc().getDims();
-            if (shape_dims.size() > 1)
-                IE_THROW() << layer->name << " Shape vector should be 1 dimension";
-
-            LayerConfig config;
-            DataConfig dataConfig, shapeConfig;
-            Precision dataPrecision = layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getPrecision();
-            const SizeVector& data_dims = layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getDims();
-            dataConfig.desc = TensorDesc(dataPrecision, data_dims,
-                                         layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getLayout());
-            config.inConfs.push_back(dataConfig);
-            shapeConfig.desc = TensorDesc(layer->insData[BROADCAST_SHAPE].lock()->getTensorDesc().getPrecision(),
-                                          shape_dims, TensorDesc::getLayoutByDims(shape_dims));
-            config.inConfs.push_back(shapeConfig);
-
-            DataConfig outConfig;
-            const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims();
-            outConfig.desc = TensorDesc(dataPrecision, out_dims, layer->outData[0]->getTensorDesc().getLayout());
-            config.outConfs.push_back(outConfig);
-            config.dynBatchSupport = false;
-            confs.push_back(config);
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        size_t shape_size = (inputs[BROADCAST_SHAPE]->getTensorDesc().getDims())[0];
-        SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
-        SizeVector src_dims = inputs[BROADCAST_INPUT]->getTensorDesc().getDims();
-        SizeVector srcStrides = inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getStrides();
-        size_t data_size = inputs[BROADCAST_INPUT]->getTensorDesc().getPrecision().size();
-
-        if (!src_dims.size())
-            src_dims = SizeVector(1, 1);
-        if (!srcStrides.size())
-            srcStrides = SizeVector(1, 1);
-
-        if (dst_dims.size() != shape_size) {
-            if (resp) {
-                std::string errorMsg = "Output tensor dimension mismatch";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return PARAMETER_MISMATCH;
-        }
-
-        if (src_dims.size() > dst_dims.size()) {
-            if (resp) {
-                std::string errorMsg = "Output tensor dimension is smaller then input tensor dimension";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return PARAMETER_MISMATCH;
-        }
-
-        InferenceEngine::SizeVector dstStrides = outputs[0]->getTensorDesc().getBlockingDesc().getStrides();
-        InferenceEngine::SizeVector src_aligned(dst_dims.size());
-        InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
-        size_t prefix_size = dst_dims.size() - src_dims.size();
-        for (size_t i = 0; i < dst_dims.size(); i++) {
-            if (i < prefix_size) {
-                src_aligned[i] = 1;
-                srcStrides_aligned[i] = srcStrides[0];
-            } else {
-                src_aligned[i] = src_dims[i - prefix_size];
-                srcStrides_aligned[i] = srcStrides[i - prefix_size];
-            }
-        }
-
-        size_t work_amount_dst = dstStrides[0] * dst_dims[0];
-        const uint8_t *src_data = inputs[BROADCAST_INPUT]->cbuffer().as<const uint8_t *>() +
-                                inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        uint8_t* dst_data = outputs[0]->cbuffer().as<uint8_t *>() +
-                          outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        parallel_nt(0, [&](const int ithr, const int nthr) {
-            size_t i, src_idx, start = 0, end = 0;
-            SizeVector counters(dst_dims.size(), 0);
-            splitter(work_amount_dst, nthr, ithr, start, end);
-            for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
-                counters[j] = i % dst_dims[j];
-                i /= dst_dims[j];
-            }
-            for (size_t iwork = start * data_size; iwork < end * data_size; iwork += data_size) {
-                for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
-                    src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
-
-                cpu_memcpy(&dst_data[iwork], &src_data[src_idx * data_size], data_size);
-
-                for (int j = dst_dims.size() - 1; j >= 0; j--) {
-                    counters[j] = (counters[j] + 1) % dst_dims[j];
-                    if (counters[j] != 0) break;
-                }
-            }
-        });
-
-        return OK;
-    }
-
-private:
-    const size_t BROADCAST_INPUT = 0;
-    const size_t BROADCAST_SHAPE = 1;
-};
-
-REG_FACTORY_FOR(BroadcastImpl, Broadcast);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
index f9baeb4f479..22b34e123e6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
@@ -12,55 +12,72 @@
 #include <algorithm>
 #include <limits>
 #include "ie_parallel.hpp"
+#include <ngraph/opsets/opset3.hpp>
+
+using namespace MKLDNNPlugin;
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
 class BucketizeImpl : public ExtLayerBase {
-public:
-    explicit BucketizeImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() != 2 || layer->outData.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
+            const auto bucketsize = std::dynamic_pointer_cast<const ngraph::opset3::Bucketize>(op);
+            if (!bucketsize) {
+                errorMessage = "Only opset3 Bucketize operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
+
+    std::string errorPrefix;
+
+public:
+    explicit BucketizeImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            errorPrefix = "Bucketize layer with name '" + op->get_friendly_name() + "' ";
+            const auto bucketsize = std::dynamic_pointer_cast<const ngraph::opset3::Bucketize>(op);
+
+            if (op->get_input_size() != 2 || op->get_output_size() != 1) {
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
             }
 
             // check one attribute
-            with_right = layer->GetParamAsBool("with_right_bound");
-
-            auto input = layer->insData[INPUT_TENSOR_PORT].lock();
-            if (!input) {
-                IE_THROW() << "Missing input for " << layer->name << " layer";
-            }
-            auto boundaries = layer->insData[INPUT_BINS_PORT].lock();
-            if (!boundaries) {
-                IE_THROW() << "Missing boundaries input for " << layer->name << " layer";
-            }
+            with_right = bucketsize->get_with_right_bound();
 
             // check precisions for input and output tensors
-            input_precision = input->getTensorDesc().getPrecision();
+            input_precision = details::convertPrecision(op->get_input_element_type(INPUT_TENSOR_PORT));
             if (input_precision != Precision::FP32 && input_precision != Precision::I32 &&
                 input_precision != Precision::I64) {
                 input_precision = Precision::FP32;
             }
-            boundaries_precision = boundaries->getTensorDesc().getPrecision();
+            boundaries_precision = details::convertPrecision(op->get_input_element_type(INPUT_BINS_PORT));
             if (boundaries_precision != Precision::FP32 && boundaries_precision != Precision::I32 &&
                 boundaries_precision != Precision::I64) {
                 boundaries_precision = Precision::FP32;
             }
-            output_precision = layer->outData[OUTPUT_TENSOR_PORT]->getTensorDesc().getPrecision();
+            output_precision = details::convertPrecision(op->get_output_element_type(OUTPUT_TENSOR_PORT));
             if (output_precision != Precision::I32 && output_precision != Precision::I64) {
                 output_precision = Precision::I32;
             }
 
             // check dimensions of input tensors
-            SizeVector input_tensor_dims = input->getTensorDesc().getDims();
+            SizeVector input_tensor_dims = op->get_input_shape(INPUT_TENSOR_PORT);
             if (input_tensor_dims.size() < 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions of the input.";
+                IE_THROW() << errorPrefix << " has incorrect dimensions of the input.";
             }
-            SizeVector input_bin_dims = boundaries->getTensorDesc().getDims();
+            SizeVector input_bin_dims = op->get_input_shape(INPUT_BINS_PORT);
             if (input_bin_dims.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions of the boundaries tensor.";
+                IE_THROW() << errorPrefix << " has incorrect dimensions of the boundaries tensor.";
             }
             if (input_bin_dims[0] != 0) {
                 with_bins = true;
@@ -69,9 +86,9 @@ public:
 
             num_values = std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), 1, std::multiplies<size_t>());
 
-            addConfig(layer,
-            { DataConfigurator(ConfLayout::PLN, input_precision), DataConfigurator(ConfLayout::PLN, boundaries_precision) },
-            { DataConfigurator(ConfLayout::PLN, output_precision) });
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, input_precision},
+                           {TensorDescCreatorTypes::ncsp, boundaries_precision}},
+                          {{TensorDescCreatorTypes::ncsp, output_precision}});
         }
         catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder.cpp b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder.cpp
index 294d4185672..0ba6ca7e960 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder.cpp
@@ -4,45 +4,68 @@
 
 #include "base.hpp"
 #include "ie_parallel.hpp"
+#include <ngraph/op/ctc_greedy_decoder.hpp>
+#include <nodes/common/tensor_desc_creator.h>
 
-#include <vector>
 #include <string>
+#include <vector>
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class CTCGreedyDecoderImpl: public ExtLayerBase {
 public:
-    explicit CTCGreedyDecoderImpl(const CNNLayer* layer) : mergeRepeated_(true) {
-        std::string errPrefix = "CTCGreedyDecoder layer with name '" + layer->name + "' ";
-        if (layer->insData.size() != 2)
-            IE_THROW() << errPrefix << "has invalid number of input edges: " << layer->insData.size();
-        if (layer->outData.size() != 1)
-            IE_THROW() << errPrefix << "has invalid number of outputs edges: " << layer->outData.size();
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+        try {
+            auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v0::CTCGreedyDecoder>(op);
+            if (!greedyDecOp) {
+                errorMessage = "Node is not an instance of the CTCGreedyDecoder operation from operation set v0.";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
 
-        auto inData = layer->insData[DATA_INDEX].lock();
-        auto sequenceLenData = layer->insData[SEQUENCE_LENGTH_INDEX].lock();
-        if (!inData || !sequenceLenData)
-            IE_THROW() << errPrefix << "has nullable inputs.";
-        if (inData->getTensorDesc().getDims()[0] != sequenceLenData->getTensorDesc().getDims()[0] &&
-                inData->getTensorDesc().getDims()[1] != sequenceLenData->getTensorDesc().getDims()[1])
-            IE_THROW() << errPrefix << "has invalid input shapes.";
-        if (inData->getTensorDesc().getPrecision() != Precision::FP32 &&
-                inData->getTensorDesc().getPrecision() != Precision::BF16)
-            IE_THROW() << errPrefix << "has unsupported 'data' input precision: " << inData->getTensorDesc().getPrecision();
-        if (sequenceLenData->getTensorDesc().getPrecision() != Precision::FP32 &&
-                inData->getTensorDesc().getPrecision() != Precision::BF16)
-            IE_THROW() << errPrefix << "has unsupported 'sequence_length' input precision: " << sequenceLenData->getTensorDesc().getPrecision();
+        return true;
+    }
 
-        std::vector<DataConfigurator> inputConfigs{{ConfLayout::PLN, Precision::FP32}, {ConfLayout::PLN, Precision::FP32}};
-        std::vector<DataConfigurator> outputConfigs{{ConfLayout::PLN, Precision::FP32}};
-        addConfig(layer, inputConfigs, outputConfigs);
+    explicit CTCGreedyDecoderImpl(const std::shared_ptr<ngraph::Node>& op) : mergeRepeated_(true) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
 
-        if (layer->CheckParamPresence("ctc_merge_repeated")) {
-            mergeRepeated_ = layer->GetParamAsBool("ctc_merge_repeated");
-        } else if (layer->CheckParamPresence("merge_repeated")) {
-            mergeRepeated_ = layer->GetParamAsBool("merge_repeated", true);
+            std::string errPrefix = "CTCGreedyDecoder layer with name '" + op->get_friendly_name() + "' ";
+            if (op->get_input_size() != 2)
+                IE_THROW() << errPrefix << "has invalid number of input edges: " << op->get_input_size();
+            if (op->get_output_size() != 1)
+                IE_THROW() << errPrefix << "has invalid number of outputs edges: " << op->get_output_size();
+
+            if (op->get_input_shape(DATA_INDEX)[0] != op->get_input_shape(SEQUENCE_LENGTH_INDEX)[0] &&
+                    op->get_input_shape(DATA_INDEX)[1] != op->get_input_shape(SEQUENCE_LENGTH_INDEX)[1])
+                IE_THROW() << errPrefix << "has invalid input shapes.";
+
+            Precision inDataPrecision = details::convertPrecision(op->get_input_element_type(DATA_INDEX));
+            if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::BF16)
+                IE_THROW() << errPrefix << "has unsupported 'data' input precision: " << inDataPrecision;
+
+            Precision seqLenPrecision = details::convertPrecision(op->get_input_element_type(SEQUENCE_LENGTH_INDEX));
+            if (seqLenPrecision != Precision::FP32 && seqLenPrecision != Precision::BF16)
+                IE_THROW() << errPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision;
+
+            auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v0::CTCGreedyDecoder>(op);
+            mergeRepeated_ = greedyDecOp->get_ctc_merge_repeated();
+
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                           {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
+        } catch (InferenceEngine::Exception &ex) {
+            errorMsg = ex.what();
+            throw;
         }
     }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder_seq_len.cpp b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder_seq_len.cpp
index 69a97b2483f..c60684ee0af 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder_seq_len.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy_decoder_seq_len.cpp
@@ -4,51 +4,81 @@
 
 #include "base.hpp"
 #include "ie_parallel.hpp"
+#include <ngraph/op/ctc_greedy_decoder_seq_len.hpp>
+#include <nodes/common/tensor_desc_creator.h>
 
-#include <vector>
 #include <string>
+#include <vector>
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class CTCGreedyDecoderSeqLenImpl: public ExtLayerBase {
 public:
-    explicit CTCGreedyDecoderSeqLenImpl(const CNNLayer* layer) : mergeRepeated_(true) {
-        errPrefix = "CTCGreedyDecoderSeqLen layer with name '" + layer->name + "' ";
-        if (layer->insData.size() < 2 || layer->insData.size() > 3)
-            IE_THROW() << errPrefix << "has invalid number of input edges: " << layer->insData.size();
-        if (layer->outData.size() != 2)
-            IE_THROW() << errPrefix << "has invalid number of outputs edges: " << layer->outData.size();
-
-        auto inData = layer->insData[DATA_INDEX].lock();
-        auto sequenceLenData = layer->insData[SEQUENCE_LENGTH_INDEX].lock();
-        if (!inData || !sequenceLenData)
-            IE_THROW() << errPrefix << "has nullable inputs.";
-        if (inData->getTensorDesc().getDims()[0] != sequenceLenData->getTensorDesc().getDims()[0])
-            IE_THROW() << errPrefix << "has invalid input shapes.";
-        if (inData->getTensorDesc().getPrecision() != Precision::FP32 &&
-                inData->getTensorDesc().getPrecision() != Precision::BF16)
-            IE_THROW() << errPrefix << "has unsupported 'data' input precision: " << inData->getTensorDesc().getPrecision();
-        if (sequenceLenData->getTensorDesc().getPrecision() != Precision::I32 &&
-                sequenceLenData->getTensorDesc().getPrecision() != Precision::I64)
-            IE_THROW() << errPrefix << "has unsupported 'sequence_length' input precision: " << sequenceLenData->getTensorDesc().getPrecision();
-
-        std::vector<DataConfigurator> inputConfigs{{ConfLayout::PLN, Precision::FP32}, {ConfLayout::PLN, Precision::I32}};
-
-        if (layer->insData.size() > BLANK_INDEX) {
-            auto blankIndexData = layer->insData[BLANK_INDEX].lock();
-            if (!blankIndexData)
-                IE_THROW() << errPrefix << "has nullable inputs.";
-            if (blankIndexData->getTensorDesc().getPrecision() != Precision::I32 &&
-                    blankIndexData->getTensorDesc().getPrecision() != Precision::I64)
-                IE_THROW() << errPrefix << "has unsupported 'blank_index' input precision: " << blankIndexData->getTensorDesc().getPrecision();
-            inputConfigs.push_back({ConfLayout::PLN, Precision::I32});
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+        try {
+            auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v6::CTCGreedyDecoderSeqLen>(op);
+            if (!greedyDecOp) {
+                errorMessage = "Node is not an instance of the CTCGreedyDecoderSeqLen operation from operation set v6.";
+                return false;
+            }
+        } catch (...) {
+            return false;
         }
-        std::vector<DataConfigurator> outputConfigs{{ConfLayout::PLN, Precision::I32}, {ConfLayout::PLN, Precision::I32}};
-        addConfig(layer, inputConfigs, outputConfigs);
 
-        mergeRepeated_ = layer->GetParamAsBool("merge_repeated", true);
+        return true;
+    }
+
+    explicit CTCGreedyDecoderSeqLenImpl(const std::shared_ptr<ngraph::Node>& op) : mergeRepeated_(true) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            std::string errPrefix = "CTCGreedyDecoderSeqLen layer with name '" + op->get_friendly_name() + "' ";
+            if (op->get_input_size() < 2 || op->get_input_size() > 3)
+                IE_THROW() << errPrefix << "has invalid number of input edges: " << op->get_input_size();
+            if (op->get_output_size() != 2)
+                IE_THROW() << errPrefix << "has invalid number of outputs edges: " << op->get_output_size();
+
+            if (op->get_input_shape(DATA_INDEX)[0] != op->get_input_shape(SEQUENCE_LENGTH_INDEX)[0])
+                IE_THROW() << errPrefix << "has invalid input shapes.";
+
+            Precision inDataPrecision = details::convertPrecision(op->get_input_element_type(DATA_INDEX));
+            if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::BF16)
+                IE_THROW() << errPrefix << "has unsupported 'data' input precision: " << inDataPrecision;
+
+            Precision seqLenPrecision = details::convertPrecision(op->get_input_element_type(SEQUENCE_LENGTH_INDEX));
+            if (seqLenPrecision != Precision::I32 && seqLenPrecision != Precision::I64)
+                IE_THROW() << errPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision;
+
+            auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v6::CTCGreedyDecoderSeqLen>(op);
+            mergeRepeated_ = greedyDecOp->get_merge_repeated();
+
+            if (op->get_input_size() == BLANK_INDEX) {
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::I32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}});
+            } else {
+                Precision blIdxPrecision = details::convertPrecision(op->get_input_element_type(BLANK_INDEX));
+                if (blIdxPrecision != Precision::I32 && blIdxPrecision != Precision::I64)
+                    IE_THROW() << errPrefix << "has unsupported 'blank_index' input precision: " << blIdxPrecision;
+
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::I32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}});
+            }
+        } catch (InferenceEngine::Exception &ex) {
+            errorMsg = ex.what();
+            throw;
+        }
     }
 
     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
diff --git a/inference-engine/src/mkldnn_plugin/nodes/ctc_loss.cpp b/inference-engine/src/mkldnn_plugin/nodes/ctc_loss.cpp
index 52fd41816c6..84d6b55a1a4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/ctc_loss.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_loss.cpp
@@ -4,6 +4,8 @@
 
 #include "base.hpp"
 #include "ie_parallel.hpp"
+#include <ngraph/op/ctc_loss.hpp>
+#include <nodes/common/tensor_desc_creator.h>
 
 #include <cmath>
 
@@ -12,46 +14,52 @@ namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class CTCLossImpl : public ExtLayerBase {
 public:
-    explicit CTCLossImpl(const CNNLayer* layer) {
-        _logPrefix = std::string("CTCLoss layer with name '") + layer->name + "'";
-
-        if (layer->insData.size() != 4 && layer->insData.size() != 5)
-            IE_THROW() << _logPrefix << " has invalid inputs number.";
-
-        _ctcMergeRepeated = layer->GetParamAsBool("ctc_merge_repeated", true);
-        _preprocessCollapseRepeated = layer->GetParamAsBool("preprocess_collapse_repeated", false);
-        _unique = layer->GetParamAsBool("unique", false);
-
-        auto logitsData = layer->insData[0].lock();
-        if (logitsData == nullptr)
-            IE_THROW() << _logPrefix << " has nullable logits data";
-
-        LayerConfig config;
-        config.inConfs.resize(layer->insData.size());
-        config.inConfs[0].desc = TensorDesc(Precision::FP32,
-            logitsData->getTensorDesc().getDims(),
-            TensorDesc::getLayoutByDims(logitsData->getTensorDesc().getDims()));
-        auto intPrecision = Precision::I32;
-        for (int i = 1; i < layer->insData.size(); i++) {
-            auto data = layer->insData[i].lock();
-            if (data == nullptr)
-                IE_THROW() << _logPrefix << " has nullable input data at " << i;
-            config.inConfs[i].desc = TensorDesc(intPrecision,
-                data->getTensorDesc().getDims(),
-                TensorDesc::getLayoutByDims(data->getTensorDesc().getDims()));
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+        try {
+            auto ctcLossOp = ngraph::as_type_ptr<const ngraph::op::v4::CTCLoss>(op);
+            if (!ctcLossOp) {
+                errorMessage = "Node is not an instance of the CTCLoss operation from operation set v4.";
+                return false;
+            }
+        } catch (...) {
+            return false;
         }
 
-        DataConfig outConfig;
-        auto& outDims = layer->outData[0]->getTensorDesc().getDims();
-        outConfig.desc = TensorDesc(Precision::FP32,
-            outDims,
-            TensorDesc::getLayoutByDims(outDims));
-        config.outConfs.push_back(outConfig);
-        config.dynBatchSupport = false;
+        return true;
+    }
 
-        confs.push_back(config);
+    explicit CTCLossImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            _logPrefix = std::string("CTCLoss layer with name '") + op->get_friendly_name() + "'";
+
+            if (op->get_input_size() != 4 && op->get_input_size() != 5)
+                IE_THROW() << _logPrefix << " has invalid inputs number.";
+
+            auto ctcLossOp = ngraph::as_type_ptr<const ngraph::op::v4::CTCLoss>(op);
+            _ctcMergeRepeated = ctcLossOp->get_ctc_merge_repeated();
+            _preprocessCollapseRepeated = ctcLossOp->get_preprocess_collapse_repeated();
+            _unique = ctcLossOp->get_unique();
+
+            std::vector<DataConfigurator> inDataConfigurators;
+            inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::FP32});
+            for (int i = 1; i < op->get_input_size(); i++) {
+                inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32});
+            }
+            addConfig(op, inDataConfigurators,
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
+        } catch (InferenceEngine::Exception &ex) {
+            errorMsg = ex.what();
+            throw;
+        }
     }
 
     StatusCode execute(std::vector<Blob::Ptr>& inputs,
diff --git a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
index e2348212f83..2ed69db46b1 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
@@ -9,11 +9,16 @@
 #include <vector>
 #include "ie_parallel.hpp"
 #include "ie_precision.hpp"
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ie_ngraph_utils.hpp>
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class CumSumImpl: public ExtLayerBase {
     enum { CUM_SUM_DATA, AXIS, numOfInputs };
     bool exclusive;
@@ -22,71 +27,67 @@ class CumSumImpl: public ExtLayerBase {
     size_t axis = 0;
     std::vector<size_t> shape;
 
-public:
-    explicit CumSumImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            layerName = layer->name;
-            if ((layer->insData.size() != numOfInputs && layer->insData.size() != (numOfInputs - 1)) || layer->outData.size() != 1)
+            const auto cumsum = std::dynamic_pointer_cast<const ngraph::opset3::CumSum>(op);
+            if (!cumsum) {
+                errorMessage = "Only opset3 CumSum operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
+
+public:
+    explicit CumSumImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            layerName = op->get_friendly_name();
+            if ((op->get_input_size() != numOfInputs && op->get_input_size() != (numOfInputs - 1)) || op->get_output_size() != 1)
                 IE_THROW() << "CumSum layer with name '" << layerName << "' has incorrect number of input/output edges!";
 
-            const auto &dataTensor = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc();
-            const auto &dataShape = dataTensor.getDims();
+            const auto &dataShape = op->get_input_shape(CUM_SUM_DATA);
             if (dataShape.size() < 1) {
                 IE_THROW() << "CumSum layer with name '" << layerName << "' doesn't support 'data' input tensor with rank: " << dataShape.size();
             }
             numOfDims = dataShape.size();
 
-            exclusive = layer->GetParamAsBool("exclusive", false);
-            reverse = layer->GetParamAsBool("reverse", false);
+            const auto cumsum = std::dynamic_pointer_cast<const ngraph::opset3::CumSum>(op);
+            exclusive = cumsum->is_exclusive();
+            reverse = cumsum->is_reverse();
 
-            const auto& dataPrecision = dataTensor.getPrecision();
+            auto dataPrecision = details::convertPrecision(cumsum->get_input_element_type(CUM_SUM_DATA));
             if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
                 dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
                 IE_THROW() << "CumSum layer with name '" << layerName << "' has unsupported 'data' input precision: " << dataPrecision.name();
 
-            if (layer->insData.size() == numOfInputs) {
-                const auto& axisTensor = layer->insData[AXIS].lock()->getTensorDesc();
-                const auto& axisTensorPrec = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
+            if (cumsum->get_input_size() == numOfInputs) {
+                const auto& axisTensorPrec = details::convertPrecision(cumsum->get_input_element_type(AXIS));
                 if (axisTensorPrec != Precision::I32 && axisTensorPrec != Precision::I64)
                     IE_THROW() << "CumSum layer with name '" << layerName << "' has unsupported 'axis' input precision: " << axisTensorPrec.name();
 
-                const auto axisTensorRank = axisTensor.getDims().size();
-                if (axisTensorRank != 0)
-                    IE_THROW() << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with rank: " << axisTensorRank;
+                if (!ngraph::is_scalar(cumsum->get_input_shape(AXIS)))
+                    IE_THROW() << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with non scalar rank";
             }
 
-            if (dataShape != layer->outData[0]->getTensorDesc().getDims())
+            if (dataShape != cumsum->get_output_shape(0))
                 IE_THROW() << "CumSum layer with name '" << layerName << "' has different 'data' input and output dimensions";
 
             shape = dataShape;
 
-            LayerConfig config;
-            for (size_t i = 0; i < layer->insData.size(); i++) {
-                DataConfig inConfig;
-                inConfig.inPlace = -1;
-                inConfig.constant = false;
-
-                Precision inPrecision = i == 1 ? Precision(Precision::I32) : layer->insData[i].lock()->getTensorDesc().getPrecision();
-                if (inPrecision == Precision::BF16)
-                    inPrecision = Precision::FP32;
-                const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims();
-                inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
-
-                config.inConfs.push_back(inConfig);
-            }
-            DataConfig outConfig;
-            outConfig.inPlace = -1;
-            outConfig.constant = false;
-            Precision outPrecision = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc().getPrecision();
-            if (outPrecision == Precision::BF16)
-                outPrecision = Precision::FP32;
-            const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
-            outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
-
-            config.outConfs.push_back(outConfig);
-
-            config.dynBatchSupport = false;
-            confs.push_back(config);
+            std::vector<DataConfigurator> inDataConfigurators;
+            if (dataPrecision == Precision::BF16)
+                dataPrecision = Precision::FP32;
+            inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, dataPrecision});
+            if (op->get_input_size() > 1)
+                inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32});
+            addConfig(op, inDataConfigurators, {{TensorDescCreatorTypes::ncsp, dataPrecision}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
         }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
index 780bceb8770..bd3b1da8fc8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
@@ -10,12 +10,17 @@
 #include <string>
 #include <utility>
 #include <algorithm>
+#include "caseless.hpp"
 #include "ie_parallel.hpp"
+#include "common/tensor_desc_creator.h"
+#include <ngraph/op/detection_output.hpp>
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 template <typename T>
 static bool SortScorePairDescend(const std::pair<float, T>& pair1,
                                  const std::pair<float, T>& pair2) {
@@ -24,98 +29,95 @@ static bool SortScorePairDescend(const std::pair<float, T>& pair1,
 
 class DetectionOutputImpl: public ExtLayerBase {
 public:
-    explicit DetectionOutputImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() != 3 && layer->insData.size() != 5)
-                IE_THROW() << "Incorrect number of input edges for layer " << layer->name;
-            if (layer->outData.empty())
-                IE_THROW() << "Incorrect number of output edges for layer " << layer->name;
+            auto doOp = ngraph::as_type_ptr<const ngraph::op::v0::DetectionOutput>(op);
+            if (!doOp) {
+                errorMessage = "Node is not an instance of the DetectionOutput from the operations set v0.";
+                return false;
+            }
+            if (!details::CaselessEq<std::string>()(doOp->get_attrs().code_type, "caffe.PriorBoxParameter.CENTER_SIZE") &&
+                    !details::CaselessEq<std::string>()(doOp->get_attrs().code_type, "caffe.PriorBoxParameter.CORNER")) {
+                errorMessage = "Unsupported code_type attribute.";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            _num_classes = layer->GetParamAsInt("num_classes");
-            _background_label_id = layer->GetParamAsInt("background_label_id", 0);
-            _top_k = layer->GetParamAsInt("top_k", -1);
-            _variance_encoded_in_target = layer->GetParamAsBool("variance_encoded_in_target", false);
-            _keep_top_k = layer->GetParamAsInt("keep_top_k", -1);
-            _nms_threshold = layer->GetParamAsFloat("nms_threshold");
-            _confidence_threshold = layer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
-            _share_location = layer->GetParamAsBool("share_location", true);
-            _clip_before_nms = layer->GetParamAsBool("clip_before_nms", false) ||
-                               layer->GetParamAsBool("clip", false);  // for backward compatibility
-            _clip_after_nms = layer->GetParamAsBool("clip_after_nms", false);
-            _decrease_label_id = layer->GetParamAsBool("decrease_label_id", false);
-            _normalized = layer->GetParamAsBool("normalized", true);
-            _image_height = layer->GetParamAsInt("input_height", 1);
-            _image_width = layer->GetParamAsInt("input_width", 1);
+    explicit DetectionOutputImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+            if (op->get_input_size() != 3 && op->get_input_size() != 5)
+                IE_THROW() <<  "Invalid number of input edges.";
+
+            if (op->get_output_size() != 1)
+                IE_THROW() << "Invalid number of output edges.";
+
+            auto doOp = ngraph::as_type_ptr<const ngraph::op::v0::DetectionOutput>(op);
+            auto attributes = doOp->get_attrs();
+
+            _num_classes = attributes.num_classes;
+            _background_label_id = attributes.background_label_id;
+            _top_k = attributes.top_k;
+            _variance_encoded_in_target = attributes.variance_encoded_in_target;
+            _keep_top_k = attributes.keep_top_k[0];
+            _nms_threshold = attributes.nms_threshold;
+            _confidence_threshold = attributes.confidence_threshold;
+            _share_location = attributes.share_location;
+            _clip_before_nms = attributes.clip_before_nms;
+            _clip_after_nms = attributes.clip_after_nms;
+            _decrease_label_id = attributes.decrease_label_id;
+            _normalized = attributes.normalized;
+            _image_height = attributes.input_height;
+            _image_width = attributes.input_width;
             _prior_size = _normalized ? 4 : 5;
             _offset = _normalized ? 0 : 1;
             _num_loc_classes = _share_location ? 1 : _num_classes;
 
-            with_add_box_pred = layer->insData.size() == 5;
-            _objectness_score = layer->GetParamAsFloat("objectness_score", 0.0f);
+            with_add_box_pred = op->get_input_size() == 5;
+            _objectness_score = attributes.objectness_score;
 
-            std::string code_type_str = layer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
-            _code_type = (code_type_str == "caffe.PriorBoxParameter.CENTER_SIZE" ? CodeType::CENTER_SIZE
-                                                                                 : CodeType::CORNER);
+            _code_type = (details::CaselessEq<std::string>()(attributes.code_type, "caffe.PriorBoxParameter.CENTER_SIZE") ?
+                CodeType::CENTER_SIZE : CodeType::CORNER);
 
-            _num_priors = static_cast<int>(layer->insData[idx_priors].lock()->getDims().back() / _prior_size);
-            _priors_batches = layer->insData[idx_priors].lock()->getDims().front() != 1;
+            _num_priors = static_cast<int>(op->get_input_shape(idx_priors).back() / _prior_size);
+            _priors_batches = op->get_input_shape(idx_priors).front() != 1;
 
-            if (_num_priors * _num_loc_classes * 4 != static_cast<int>(layer->insData[idx_location].lock()->getDims()[1]))
+            if (_num_priors * _num_loc_classes * 4 != static_cast<int>(op->get_input_shape(idx_location)[1]))
                 IE_THROW() << "Number of priors must match number of location predictions ("
                                    << _num_priors * _num_loc_classes * 4 << " vs "
-                                   << layer->insData[idx_location].lock()->getDims()[1] << ")";
+                                   << op->get_input_shape(idx_location)[1] << ")";
 
-            if (_num_priors * _num_classes != static_cast<int>(layer->insData[idx_confidence].lock()->getTensorDesc().getDims().back()))
+            if (_num_priors * _num_classes != static_cast<int>(op->get_input_shape(idx_confidence).back()))
                 IE_THROW() << "Number of priors must match number of confidence predictions.";
 
             if (_decrease_label_id && _background_label_id != 0)
                 IE_THROW() << "Cannot use decrease_label_id and background_label_id parameter simultaneously.";
 
-            _num = static_cast<int>(layer->insData[idx_confidence].lock()->getTensorDesc().getDims()[0]);
+            _num = static_cast<int>(op->get_input_shape(idx_confidence)[0]);
 
-            InferenceEngine::SizeVector bboxes_size{static_cast<size_t>(_num),
-                                                    static_cast<size_t>(_num_classes),
-                                                    static_cast<size_t>(_num_priors),
-                                                    4};
-            _decoded_bboxes = InferenceEngine::make_shared_blob<float>({Precision::FP32, bboxes_size, NCHW});
-            _decoded_bboxes->allocate();
+            _decoded_bboxes.resize(_num * _num_classes * _num_priors * 4);
+            _buffer.resize(_num * _num_classes * _num_priors);
+            _indices.resize(_num * _num_classes * _num_priors);
+            _detections_count.resize(_num * _num_classes);
+            _bbox_sizes.resize(_num * _num_classes * _num_priors);
+            _num_priors_actual.resize(_num);
 
-            InferenceEngine::SizeVector buf_size{static_cast<size_t>(_num),
-                                                 static_cast<size_t>(_num_classes),
-                                                 static_cast<size_t>(_num_priors)};
-            _buffer = InferenceEngine::make_shared_blob<int>({Precision::I32, buf_size, {buf_size, {0, 1, 2}}});
-            _buffer->allocate();
+            const auto &confSize = op->get_input_shape(idx_confidence);
+            _reordered_conf.resize(std::accumulate(confSize.begin(), confSize.end(), 1, std::multiplies<size_t>()));
 
-            InferenceEngine::SizeVector indices_size{static_cast<size_t>(_num),
-                                                     static_cast<size_t>(_num_classes),
-                                                     static_cast<size_t>(_num_priors)};
-            _indices = InferenceEngine::make_shared_blob<int>(
-                    {Precision::I32, indices_size, {indices_size, {0, 1, 2}}});
-            _indices->allocate();
-
-            InferenceEngine::SizeVector detections_size{static_cast<size_t>((size_t)(_num) * _num_classes)};
-            _detections_count = InferenceEngine::make_shared_blob<int>({Precision::I32, detections_size, C});
-            _detections_count->allocate();
-
-            const InferenceEngine::SizeVector &conf_size = layer->insData[idx_confidence].lock()->getTensorDesc().getDims();
-            _reordered_conf = InferenceEngine::make_shared_blob<float>({Precision::FP32, conf_size, ANY});
-            _reordered_conf->allocate();
-
-            InferenceEngine::SizeVector decoded_bboxes_size{static_cast<size_t>(_num),
-                                                            static_cast<size_t>(_num_priors),
-                                                            static_cast<size_t>(_num_classes)};
-            _bbox_sizes = InferenceEngine::make_shared_blob<float>(
-                    {Precision::FP32, decoded_bboxes_size, {decoded_bboxes_size, {0, 1, 2}}});
-            _bbox_sizes->allocate();
-
-            InferenceEngine::SizeVector num_priors_actual_size{static_cast<size_t>(_num)};
-            _num_priors_actual = InferenceEngine::make_shared_blob<int>({Precision::I32, num_priors_actual_size, C});
-            _num_priors_actual->allocate();
-
-            std::vector<DataConfigurator> in_data_conf(layer->insData.size(), DataConfigurator(ConfLayout::PLN, Precision::FP32));
-            addConfig(layer, in_data_conf, {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
+            std::vector<DataConfigurator> inDataConfigurators(op->get_input_size(), {TensorDescCreatorTypes::ncsp, Precision::FP32});
+            addConfig(op, inDataConfigurators,
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
+            throw;
         }
     }
 
@@ -131,13 +133,13 @@ public:
 
         const int N = inputs[idx_confidence]->getTensorDesc().getDims()[0];
 
-        float *decoded_bboxes_data = _decoded_bboxes->buffer().as<float *>();
-        float *reordered_conf_data = _reordered_conf->buffer().as<float *>();
-        float *bbox_sizes_data     = _bbox_sizes->buffer().as<float *>();
-        int *detections_data       = _detections_count->buffer().as<int *>();
-        int *buffer_data           = _buffer->buffer().as<int *>();
-        int *indices_data          = _indices->buffer().as<int *>();
-        int *num_priors_actual     = _num_priors_actual->buffer().as<int *>();
+        float *decoded_bboxes_data = _decoded_bboxes.data();
+        float *reordered_conf_data = _reordered_conf.data();
+        float *bbox_sizes_data     = _bbox_sizes.data();
+        int *detections_data       = _detections_count.data();
+        int *buffer_data           = _buffer.data();
+        int *indices_data          = _indices.data();
+        int *num_priors_actual     = _num_priors_actual.data();
 
         for (int n = 0; n < N; ++n) {
             const float *ppriors = prior_data;
@@ -396,13 +398,13 @@ private:
     void nms_mx(const float *conf_data, const float *bboxes, const float *sizes,
                 int *buffer, int *indices, int *detections, int num_priors_actual);
 
-    InferenceEngine::Blob::Ptr _decoded_bboxes;
-    InferenceEngine::Blob::Ptr _buffer;
-    InferenceEngine::Blob::Ptr _indices;
-    InferenceEngine::Blob::Ptr _detections_count;
-    InferenceEngine::Blob::Ptr _reordered_conf;
-    InferenceEngine::Blob::Ptr _bbox_sizes;
-    InferenceEngine::Blob::Ptr _num_priors_actual;
+    std::vector<float> _decoded_bboxes;
+    std::vector<int> _buffer;
+    std::vector<int> _indices;
+    std::vector<int> _detections_count;
+    std::vector<float> _reordered_conf;
+    std::vector<float> _bbox_sizes;
+    std::vector<int> _num_priors_actual;
 };
 
 struct ConfidenceComparator {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
index 1b96434b2b9..fefcee872ce 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
@@ -12,6 +12,8 @@
 #include <utility>
 #include <algorithm>
 #include "ie_parallel.hpp"
+#include "common/tensor_desc_creator.h"
+#include <ngraph/op/experimental_detectron_detection_output.hpp>
 
 
 namespace {
@@ -44,6 +46,8 @@ namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 static
 void refine_boxes(const float* boxes, const float* deltas, const float* weights, const float* scores,
                   float* refined_boxes, float* refined_boxes_areas, float* refined_scores,
@@ -235,46 +239,46 @@ private:
     const int OUTPUT_SCORES {2};
 
 public:
-    explicit ExperimentalDetectronDetectionOutputImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            score_threshold_ = layer->GetParamAsFloat("score_threshold");
-            nms_threshold_ = layer->GetParamAsFloat("nms_threshold");
-            max_delta_log_wh_ = layer->GetParamAsFloat("max_delta_log_wh");
-            classes_num_ = layer->GetParamAsInt("num_classes");
-            max_detections_per_class_ = layer->GetParamAsInt("post_nms_count");
-            max_detections_per_image_ = layer->GetParamAsInt("max_detections_per_image");
-            class_agnostic_box_regression_ = layer->GetParamAsBool("class_agnostic_box_regression", false);
-            deltas_weights_ = layer->GetParamAsFloats("deltas_weights");
-
-
-            LayerConfig config;
-            for (auto in : layer->insData) {
-                auto in_ = in.lock();
-                auto dims = in_->getTensorDesc().getDims();
-                DataConfig data;
-                data.desc = TensorDesc(Precision::FP32, dims, in_->getTensorDesc().getLayoutByDims(dims));
-                config.inConfs.push_back(data);
+            auto doOp = ngraph::as_type_ptr<const ngraph::op::v6::ExperimentalDetectronDetectionOutput>(op);
+            if (!doOp) {
+                errorMessage = "Node is not an instance of the ExperimentalDetectronDetectionOutput from the operations set v6.";
+                return false;
             }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            auto dimsB = layer->outData[OUTPUT_BOXES]->getTensorDesc().getDims();
-            DataConfig dataB;
-            dataB.desc = TensorDesc(Precision::FP32, dimsB,
-                                    layer->outData[OUTPUT_BOXES]->getTensorDesc().getLayoutByDims(dimsB));
-            config.outConfs.push_back(dataB);
-            auto dimsC = layer->outData[OUTPUT_CLASSES]->getTensorDesc().getDims();
-            DataConfig dataC;
-            dataC.desc = TensorDesc(Precision::I32, dimsC,
-                                    layer->outData[OUTPUT_BOXES]->getTensorDesc().getLayoutByDims(dimsC));
-            config.outConfs.push_back(dataC);
-            auto dimsS = layer->outData[OUTPUT_SCORES]->getTensorDesc().getDims();
-            DataConfig dataS;
-            dataS.desc = TensorDesc(Precision::FP32, dimsS,
-                                    layer->outData[OUTPUT_BOXES]->getTensorDesc().getLayoutByDims(dimsS));
-            config.outConfs.push_back(dataS);
-            config.dynBatchSupport = false;
-            confs.push_back(config);
+    explicit ExperimentalDetectronDetectionOutputImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+            auto doOp = ngraph::as_type_ptr<const ngraph::op::v6::ExperimentalDetectronDetectionOutput>(op);
+            auto attributes = doOp->get_attrs();
+
+            score_threshold_ = attributes.score_threshold;
+            nms_threshold_ = attributes.nms_threshold;
+            max_delta_log_wh_ = attributes.max_delta_log_wh;
+            classes_num_ = attributes.num_classes;
+            max_detections_per_class_ = attributes.post_nms_count;
+            max_detections_per_image_ = attributes.max_detections_per_image;
+            class_agnostic_box_regression_ = attributes.class_agnostic_box_regression;
+            deltas_weights_ = attributes.deltas_weights;
+
+            std::vector<DataConfigurator> inDataConfigurators(op->get_input_size(), {TensorDescCreatorTypes::ncsp, Precision::FP32});
+
+            addConfig(op, inDataConfigurators,
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                           {TensorDescCreatorTypes::ncsp, Precision::I32},
+                           {TensorDescCreatorTypes::ncsp, Precision::FP32}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
+            throw;
         }
     }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_offset_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_offset_sum.cpp
deleted file mode 100644
index 90b4dcd4d27..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_offset_sum.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "embedding_bag_sum.hpp"
-#include "ie_parallel.hpp"
-
-#include <vector>
-
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class EmbeddingBagOffsetsSumImpl: public MKLDNNEmbeddingBagSum {
-public:
-    explicit EmbeddingBagOffsetsSumImpl(const CNNLayer* layer) :
-                MKLDNNEmbeddingBagSum(layer, 3lu, 1lu, 4lu, 3lu) {
-        auto indicesData = layer->insData[INDICES_IDX].lock();
-        if (indicesData == nullptr)
-            IE_THROW() << "'" << layer->name << "' layer has nullable indices data.";
-        if (indicesData->getTensorDesc().getDims().size() != 1)
-            IE_THROW() << "'" << layer->name << "' layer has indices data with invalid shape.";
-
-        auto offsetsData = layer->insData[OFFSETS_IDX].lock();
-        if (offsetsData == nullptr)
-            IE_THROW() << "'" << layer->name << "' layer has invalid offsets data.";
-        if (offsetsData->getTensorDesc().getDims().size() != 1)
-            IE_THROW() << "'" << layer->name << "' layer's offsets data has invalid shape.";
-
-        _indicesLen = indicesData->getTensorDesc().getDims()[0];
-        _offsetsLen = offsetsData->getTensorDesc().getDims()[0];
-    }
-
-    StatusCode execute(
-                std::vector<Blob::Ptr>& inputs,
-                std::vector<Blob::Ptr>& outputs,
-                ResponseDesc* resp) noexcept override {
-        switch (inputs[0]->getTensorDesc().getPrecision()) {
-            case Precision::FP32: {
-                return processData<PrecisionTrait<Precision::FP32>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::I8: {
-                return processData<PrecisionTrait<Precision::I8>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::U8: {
-                return processData<PrecisionTrait<Precision::U8>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::I32: {
-                return processData<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs, resp);
-            }
-            default: {
-                if (resp) {
-                    std::string errorMsg = "EmbeddingBagSum layer does not support embedding table precision '"
-                            + std::string(inputs[0]->getTensorDesc().getPrecision().name()) + "'";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                }
-                return GENERAL_ERROR;
-            }
-        }
-    }
-
-protected:
-    template<typename T>
-    StatusCode processData(
-                std::vector<Blob::Ptr>& inputs,
-                std::vector<Blob::Ptr>& outputs,
-                ResponseDesc* resp) noexcept {
-        switch (inputs[1]->getTensorDesc().getPrecision()) {
-            case Precision::I32: {
-                return processData<T, PrecisionTrait<Precision::I32>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::I64: {
-                return processData<T, PrecisionTrait<Precision::I64>::value_type>(inputs, outputs, resp);
-            }
-            case Precision::U64: {
-                return processData<T, PrecisionTrait<Precision::U64>::value_type>(inputs, outputs, resp);
-            }
-            default: {
-                if (resp) {
-                    std::string errorMsg = "EmbeddingBagSum layer does not support indices precision '"
-                            + std::string(inputs[1]->getTensorDesc().getPrecision().name()) + "'";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                }
-                return GENERAL_ERROR;
-            }
-        }
-    }
-
-    template<typename T, typename I>
-    StatusCode processData(
-                std::vector<Blob::Ptr>& inputs,
-                std::vector<Blob::Ptr>& outputs,
-                ResponseDesc* resp) noexcept {
-        std::string errorMsg;
-        std::string msgPrefix = std::string("Layer EmbeddingBagOffsetsSum with name '") + _layerName + "' ";
-
-        const T* srcData = inputs[0]->cbuffer().as<const T*>() +
-            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        T* dstData = outputs[0]->buffer().as<T*>() +
-            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        const I* indicesData = inputs[INDICES_IDX]->cbuffer().as<const I*>();
-
-        const I* offsetsData = inputs[OFFSETS_IDX]->cbuffer().as<const I*>();
-        int64_t defaultIndex = -1;
-        if (inputs.size() > DEFAULT_INDEX_IDX) {
-            defaultIndex = (int64_t)inputs[DEFAULT_INDEX_IDX]->cbuffer().as<const I*>()[0];
-            if (defaultIndex < 0 || defaultIndex >= _indicesLen) {
-                std::string msg =  "Invalid default index: " + std::to_string(defaultIndex);
-                msg.copy(resp->msg, sizeof(resp->msg) - 1);
-                return GENERAL_ERROR;
-            }
-        }
-        const T* weightsData = nullptr;
-        if (_withWeights)
-            weightsData = inputs[PER_SAMPLE_WEIGHTS_IDX]->cbuffer().as<const T*>();
-
-        const auto& inDataDims = inputs[0]->getTensorDesc().getDims();
-
-        const size_t OUTPUT_BAGS_NUM = outputs[0]->getTensorDesc().getDims()[0];
-
-        std::function<void(size_t, const I*&, size_t&, size_t&, bool&)> get_idx =
-                [&](size_t embIndex, const I*& indicesRef, size_t& outSize, size_t& weightsIdx, bool& withWeights) {
-            if (embIndex >= _offsetsLen) {
-                errorMsg = msgPrefix + "has invalid embedding bag index.";
-                return;
-            }
-            if (offsetsData[embIndex] >= _indicesLen) {
-                errorMsg = msgPrefix + ". Offset value exceeds indices size in the model.\noffset: "
-                    + std::to_string(offsetsData[embIndex]) + "; indices size: " + std::to_string(_indicesLen);
-                return;
-            }
-
-            indicesRef = nullptr;
-            outSize = 0lu;
-            withWeights = _withWeights;
-
-            if (embIndex == _offsetsLen - 1lu)
-                outSize = _indicesLen - offsetsData[embIndex];
-            else
-                outSize = offsetsData[embIndex + 1lu] - offsetsData[embIndex];
-
-            if (outSize != 0lu) {
-                indicesRef = indicesData + offsetsData[embIndex];
-            } else {
-            // Empty or default bag
-                withWeights = false;
-                if (defaultIndex >= 0) {
-                    indicesRef = reinterpret_cast<I*>(&defaultIndex);
-                    outSize = 1lu;
-                }
-                return;
-            }
-
-            if (withWeights)
-                weightsIdx = offsetsData[embIndex];
-        };
-
-        auto threadBody = [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(OUTPUT_BAGS_NUM, nthr, ithr, start, end);
-            if (start >= end)
-                return;
-
-            size_t indicesSize = 0lu;
-            const I* indices = nullptr;
-            size_t weightsIdx = 0lu;
-            bool withWeights = _withWeights;
-
-            for (size_t obi = start; obi < end; obi++) {
-                size_t dstIndex = obi * _embDepth;
-                get_idx(obi, indices, indicesSize, weightsIdx, withWeights);
-                if (indices != nullptr) {
-                    withWeights = withWeights & _withWeights;
-
-                    size_t inIdx = 0lu;
-                    if (indices[inIdx] >= inDataDims[0]) {
-                        errorMsg = msgPrefix + "has invalid embedding bag index: " + std::to_string(indices[inIdx]);
-                        return;
-                    }
-                    size_t srcIndex = indices[inIdx] * _embDepth;
-
-                    if (withWeights) {
-                        for (size_t i = 0lu; i < _embDepth; i++) {
-                            dstData[dstIndex + i] = srcData[srcIndex + i] * weightsData[weightsIdx];
-                        }
-                        weightsIdx++;
-                    } else {
-                        for (size_t i = 0lu; i < _embDepth; i++) {
-                            dstData[dstIndex + i] = srcData[srcIndex + i];
-                        }
-                    }
-
-                    for (inIdx = 1lu; inIdx < indicesSize; inIdx++) {
-                        if (indices[inIdx] >= inDataDims[0]) {
-                            errorMsg = msgPrefix + "has invalid embedding bag index: " + std::to_string(indices[inIdx]);
-                            return;
-                        }
-                        size_t srcIndex = indices[inIdx] * _embDepth;
-
-                        if (withWeights) {
-                            for (size_t i = 0lu; i < _embDepth; i++) {
-                                dstData[dstIndex + i] += srcData[srcIndex + i] * weightsData[weightsIdx];
-                            }
-                            weightsIdx++;
-                        } else {
-                            for (size_t i = 0lu; i < _embDepth; i++) {
-                                dstData[dstIndex + i] += srcData[srcIndex + i];
-                            }
-                        }
-                    }
-                } else {
-                    for (size_t i = 0lu; i < _embDepth; i++) {
-                        dstData[dstIndex + i] = 0;
-                    }
-                }
-            }
-        };
-
-        parallel_nt(0, threadBody);
-
-        if (!errorMsg.empty()) {
-            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            return GENERAL_ERROR;
-        }
-
-        return OK;
-    }
-
-    void initFromInputs(std::vector<Blob::Ptr>& inputs) override {
-    }
-
-    void getIndices(size_t embIndex, const size_t*& indices, size_t& size, size_t& weightsIdx, bool& withWeights) override {
-    }
-
-    const size_t OFFSETS_IDX = 2lu;
-
-    size_t _indicesLen;
-    size_t _offsetsLen;
-};
-
-REG_FACTORY_FOR(EmbeddingBagOffsetsSumImpl, EmbeddingBagOffsetsSum);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_packed_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_packed_sum.cpp
deleted file mode 100644
index bcf6b29a237..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_packed_sum.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "embedding_bag_sum.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class EmbeddingBagPackedSumImpl: public MKLDNNEmbeddingBagSum {
-public:
-    explicit EmbeddingBagPackedSumImpl(const CNNLayer* layer) :
-            MKLDNNEmbeddingBagSum(layer, 2lu, 1lu, 2lu, 3lu) {
-        auto indicesData = layer->insData[INDICES_IDX].lock();
-        if (indicesData == nullptr)
-            IE_THROW() << "'" << layer->name << "' layer has nullable indices data.";
-        if (indicesData->getTensorDesc().getDims().size() != 2)
-            IE_THROW() << "'" << layer->name << "' layer has indices data with invalid shape.";
-
-        _indices = std::vector<std::vector<size_t>>(
-            indicesData->getTensorDesc().getDims()[0],
-            std::vector<size_t>(indicesData->getTensorDesc().getDims()[1], 0lu));
-    }
-
-    void initFromInputs(std::vector<Blob::Ptr>& inputs) override {
-        // Initialize indices
-        const size_t bagsNum = inputs[INDICES_IDX]->getTensorDesc().getDims()[0];
-        const size_t batch = inputs[INDICES_IDX]->getTensorDesc().getDims()[1];
-        if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-            const INT32* src = inputs[INDICES_IDX]->cbuffer().as<const INT32*>();
-            for (size_t i = 0lu; i < bagsNum; i++) {
-                size_t ibn = i * batch;
-                for (size_t j = 0lu; j < batch; j++) {
-                    _indices[i][j] = static_cast<size_t>(src[ibn + j]);
-                }
-            }
-        } else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-            const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
-            for (size_t i = 0lu; i < bagsNum; i++) {
-                cpu_memcpy(_indices[i].data(), src + i * batch, batch * sizeof(UINT64));
-            }
-        }
-    }
-
-    void getIndices(size_t embIndex, const size_t*& indices, size_t& size, size_t& weightsIdx, bool& withWeights) override {
-        if (embIndex >= _indices.size())
-            IE_THROW() << "Invalid embedding bag index.";
-
-        withWeights = true;
-
-        indices = _indices[embIndex].data();
-        size = _indices[0].size();
-
-        weightsIdx = embIndex * _indices[0].size();
-    }
-
-protected:
-    std::vector<std::vector<size_t>> _indices;
-};
-
-REG_FACTORY_FOR(EmbeddingBagPackedSumImpl, EmbeddingBagPackedSum);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.cpp
deleted file mode 100644
index e3fa7f2047f..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "embedding_bag_sum.hpp"
-#include "ie_parallel.hpp"
-#include "list.hpp"
-
-#include <set>
-#include <string>
-#include <vector>
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::Extensions::Cpu;
-
-
-const std::set<size_t> MKLDNNEmbeddingBagSum::_supportedIndicesTypeSize = {sizeof(INT32), sizeof(INT64)};
-
-MKLDNNEmbeddingBagSum::MKLDNNEmbeddingBagSum(
-            const CNNLayer* layer,
-            size_t requiredInputNum,
-            size_t indicesIdx,
-            size_t perSampleWeightsIdx,
-            size_t defaultIndexIdx,
-            const std::set<Precision>& supportedPrecisions) :
-                INDICES_IDX(indicesIdx),
-                PER_SAMPLE_WEIGHTS_IDX(perSampleWeightsIdx),
-                DEFAULT_INDEX_IDX(defaultIndexIdx) {
-    try {
-        std::string logPrefix = std::string("Layer EmbeddingBagSum with name '") + layer->name + "' ";
-        if (layer->insData.size() < requiredInputNum || layer->outData.size() != 1)
-            IE_THROW() << logPrefix << "has incorrect number of input or output edges!";
-        _layerName = layer->name;
-
-        auto inData = layer->insData[0].lock();
-        auto indicesData = layer->insData[INDICES_IDX].lock();
-        if (inData == nullptr || indicesData == nullptr)
-            IE_THROW() << logPrefix << "has nullable input data.";
-
-        auto dataPrecision = inData->getTensorDesc().getPrecision();
-        if (dataPrecision == Precision::BF16)
-            dataPrecision = Precision::FP32;
-        if (!supportedPrecisions.empty()) {
-            if (supportedPrecisions.find(dataPrecision) == supportedPrecisions.end())
-                IE_THROW() << logPrefix << "has unsupported precision: " << dataPrecision.name();
-        } else {
-            static const std::set<Precision> defaultSupportedPrecisions =
-                {Precision::FP32, Precision::I8, Precision::U8, Precision::I32};
-            if (defaultSupportedPrecisions.find(dataPrecision) == defaultSupportedPrecisions.end())
-                IE_THROW() << logPrefix << "has unsupported precision: " << dataPrecision.name();
-        }
-
-        if (layer->insData.size() > PER_SAMPLE_WEIGHTS_IDX)
-            _withWeights = true;
-        if (_withWeights) {
-            auto weightsData = layer->insData[PER_SAMPLE_WEIGHTS_IDX].lock();
-            if (weightsData == nullptr)
-                 IE_THROW() << logPrefix << "has nullable weights data";
-            if (weightsData->getTensorDesc().getDims() != indicesData->getTensorDesc().getDims())
-                 IE_THROW() << logPrefix << "must have equal shapes for indices and per_sample_weights inputs.";
-        }
-
-        LayerConfig config;
-        config.inConfs.resize(layer->insData.size());
-        for (int i = 0; i < layer->insData.size(); i++) {
-            auto data = layer->insData[i].lock();
-            if (data == nullptr)
-                IE_THROW() << logPrefix << "has nullable input data";
-            auto prc = data->getTensorDesc().getPrecision();
-            if (prc == Precision::BF16)
-                prc = Precision::FP32;
-            config.inConfs[i].desc = TensorDesc(prc,
-                data->getTensorDesc().getDims(),
-                TensorDesc::getLayoutByDims(data->getTensorDesc().getDims()));
-        }
-
-        DataConfig outConfig;
-        auto& outDims = layer->outData[0]->getTensorDesc().getDims();
-        outConfig.desc = TensorDesc(dataPrecision,
-            outDims,
-            TensorDesc::getLayoutByDims(outDims));
-        config.outConfs.push_back(outConfig);
-        config.dynBatchSupport = false;
-
-        confs.push_back(config);
-
-        const auto& inDataDims = inData->getTensorDesc().getDims();
-        _embDepth = 1lu;
-        for (size_t i = 1lu; i < inDataDims.size(); i++) {
-            _embDepth *= inDataDims[i];
-        }
-    } catch (InferenceEngine::Exception &ex) {
-        errorMsg = ex.what();
-    }
-}
-
-StatusCode MKLDNNEmbeddingBagSum::execute(
-            std::vector<Blob::Ptr>& inputs,
-            std::vector<Blob::Ptr>& outputs,
-            ResponseDesc *resp) noexcept {
-    switch (inputs[0]->getTensorDesc().getPrecision()) {
-        case Precision::FP32: {
-            processData<PrecisionTrait<Precision::FP32>::value_type>(inputs, outputs);
-            break;
-        }
-        case Precision::I8: {
-            processData<PrecisionTrait<Precision::I8>::value_type>(inputs, outputs);
-            break;
-        }
-        case Precision::U8: {
-            processData<PrecisionTrait<Precision::U8>::value_type>(inputs, outputs);
-            break;
-        }
-        case Precision::I32: {
-            processData<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs);
-            break;
-        }
-        default: {
-            if (resp) {
-                std::string errorMsg = "EmbeddingBagSum layer does not support precision '"
-                        + std::string(inputs[0]->getTensorDesc().getPrecision().name()) + "'";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return GENERAL_ERROR;
-        }
-    }
-
-    return OK;
-}
-
-template<typename T>
-void MKLDNNEmbeddingBagSum::processData(
-            std::vector<Blob::Ptr>& inputs,
-            std::vector<Blob::Ptr>& outputs) noexcept {
-    const T* srcData = inputs[0]->cbuffer().as<const T*>() +
-        inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-    T* dstData = outputs[0]->buffer().as<T*>() +
-        outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-    const T* weightsData = nullptr;
-    if (_withWeights)
-        weightsData = inputs[PER_SAMPLE_WEIGHTS_IDX]->cbuffer().as<const T*>();
-    initFromInputs(inputs);
-
-    const auto& inDataDims = inputs[0]->getTensorDesc().getDims();
-
-    const size_t outputBagsNum = outputs[0]->getTensorDesc().getDims()[0];
-
-    auto threadBody = [&](const int ithr, const int nthr) {
-        size_t start(0lu), end(0lu);
-        splitter(outputBagsNum, nthr, ithr, start, end);
-        if (start >= end)
-            return;
-
-        size_t indicesSize = 0lu;
-        const size_t* indices = nullptr;
-        size_t weightsIdx = 0lu;
-        bool withWeights = _withWeights;
-
-        for (size_t obi = start; obi < end; obi++) {
-            size_t dstIndex = obi * _embDepth;
-            getIndices(obi, indices, indicesSize, weightsIdx, withWeights);
-
-            if (indices != nullptr) {
-                withWeights = withWeights & _withWeights;
-
-                size_t inIdx = 0lu;
-                if (indices[inIdx] >= inDataDims[0])
-                    IE_THROW() << "EmbeddingBagSum layer '" << _layerName
-                        << "' has invalid embedding bag index: " << indices[inIdx];
-                size_t srcIndex = indices[inIdx] * _embDepth;
-
-                if (withWeights) {
-                    for (size_t i = 0lu; i < _embDepth; i++) {
-                        dstData[dstIndex + i] = srcData[srcIndex + i] * weightsData[weightsIdx];
-                    }
-                    weightsIdx++;
-                } else {
-                    for (size_t i = 0lu; i < _embDepth; i++) {
-                        dstData[dstIndex + i] = srcData[srcIndex + i];
-                    }
-                }
-
-                for (inIdx = 1lu; inIdx < indicesSize; inIdx++) {
-                    if (indices[inIdx] >= inDataDims[0])
-                        IE_THROW() << "EmbeddingBagSum layer '" << _layerName
-                            << "' has invalid embedding bag index: " << indices[inIdx];
-                    size_t srcIndex = indices[inIdx] * _embDepth;
-
-                    if (withWeights) {
-                        for (size_t i = 0lu; i < _embDepth; i++) {
-                            dstData[dstIndex + i] += srcData[srcIndex + i] * weightsData[weightsIdx];
-                        }
-                        weightsIdx++;
-                    } else {
-                        for (size_t i = 0lu; i < _embDepth; i++) {
-                            dstData[dstIndex + i] += srcData[srcIndex + i];
-                        }
-                    }
-                }
-            } else {
-                for (size_t i = 0lu; i < _embDepth; i++) {
-                    dstData[dstIndex + i] = 0;
-                }
-            }
-        }
-    };
-
-    parallel_nt(0, threadBody);
-}
diff --git a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.hpp b/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.hpp
deleted file mode 100644
index 85b625810af..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_bag_sum.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "base.hpp"
-
-#include <memory>
-#include <set>
-#include <vector>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class MKLDNNEmbeddingBagSum : public ExtLayerBase {
-public:
-    MKLDNNEmbeddingBagSum(
-        const CNNLayer* layer,
-        size_t requiredInputsNum,
-        size_t indicesIdx,
-        size_t perSampleWeightsIdx,
-        size_t defaultIndexIdx,
-        const std::set<Precision>& supportedPrecisions = {});
-
-    StatusCode execute(
-        std::vector<Blob::Ptr>& inputs,
-        std::vector<Blob::Ptr>& outputs,
-        ResponseDesc *resp) noexcept override;
-
-protected:
-    virtual void initFromInputs(std::vector<Blob::Ptr>& inputs) = 0;
-    virtual void getIndices(
-        size_t embIndex,
-        const size_t*& indicesRef,
-        size_t& size,
-        size_t& weightsIdx,
-        bool& withWeights) = 0;
-
-    template<typename T>
-    void processData(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs) noexcept;
-
-    std::set<Precision> _supportedPrecisions;
-
-    const size_t INDICES_IDX;
-    const size_t PER_SAMPLE_WEIGHTS_IDX;
-    const size_t DEFAULT_INDEX_IDX;
-
-    bool _withWeights = false;
-    size_t _embDepth = 0;
-    std::string _layerName;
-
-    using INT32 = PrecisionTrait<Precision::I32>::value_type;
-    using INT64 = PrecisionTrait<Precision::I64>::value_type;
-    using UINT64 = PrecisionTrait<Precision::U64>::value_type;
-
-    static const std::set<size_t> _supportedIndicesTypeSize;
-};
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/embedding_segments_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/embedding_segments_sum.cpp
deleted file mode 100644
index 26c70824a88..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/embedding_segments_sum.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "embedding_bag_sum.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class EmbeddingSegmentsSumImpl: public MKLDNNEmbeddingBagSum {
-public:
-    explicit EmbeddingSegmentsSumImpl(const CNNLayer* layer) :
-                MKLDNNEmbeddingBagSum(layer, 4lu, 1lu, 5lu, 4lu) {
-        std::string errPrefix = std::string("EmbeddingSegmentsSum layer with name '") + _layerName + "' ";
-        auto indicesData = layer->insData[INDICES_IDX].lock();
-        if (indicesData == nullptr)
-            IE_THROW() << errPrefix << "has nullable indices data.";
-        if (indicesData->getTensorDesc().getDims().size() != 1)
-            IE_THROW() << errPrefix << "has indices data with invalid shape: "
-                << indicesData->getTensorDesc().getDims().size();
-
-        auto segmentIdData = layer->insData[SEGMENT_ID_IDX].lock();
-        if (segmentIdData == nullptr)
-            IE_THROW() << errPrefix << "has invalid segmentID data.";
-        if (segmentIdData->getTensorDesc().getDims().size() != 1)
-            IE_THROW() << errPrefix << "has invalid segmentID data shape: "
-                << segmentIdData->getTensorDesc().getDims().size();
-
-        auto numSegmentData = layer->insData[NUM_SEGMENTS_IDX].lock();
-        if (numSegmentData == nullptr)
-            IE_THROW() << errPrefix << "has nullable numSegmentID data.";
-
-        if (_supportedIndicesTypeSize.find(indicesData->getTensorDesc().getPrecision().size())
-                    == _supportedIndicesTypeSize.end()
-                || _supportedIndicesTypeSize.find(segmentIdData->getTensorDesc().getPrecision().size())
-                    == _supportedIndicesTypeSize.end()
-                || _supportedIndicesTypeSize.find(numSegmentData->getTensorDesc().getPrecision().size())
-                    == _supportedIndicesTypeSize.end())
-            IE_THROW() << errPrefix << "has unsupported input data type.";
-
-        _indices = std::vector<size_t>(indicesData->getTensorDesc().getDims()[0], 0lu);
-        _segmentIds = std::vector<size_t>(segmentIdData->getTensorDesc().getDims()[0], 0lu);
-    }
-
-    void initFromInputs(std::vector<Blob::Ptr>& inputs) override {
-        // Initialize indices
-        if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-            const INT32* src = inputs[INDICES_IDX]->cbuffer().as<const INT32*>();
-            for (size_t i = 0lu; i < inputs[INDICES_IDX]->size(); i++)
-                _indices[i] = static_cast<size_t>(src[i]);
-        } else if (inputs[INDICES_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-            const UINT64* src = inputs[INDICES_IDX]->cbuffer().as<const UINT64*>();
-            cpu_memcpy(_indices.data(), src, inputs[INDICES_IDX]->byteSize());
-        }
-
-        // Initialize segments ids
-        if (inputs[SEGMENT_ID_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-            const INT32* src = inputs[SEGMENT_ID_IDX]->cbuffer().as<const INT32*>();
-            for (size_t i = 0lu; i < inputs[SEGMENT_ID_IDX]->size(); i++)
-                _segmentIds[i] = static_cast<size_t>(src[i]);
-        } else if (inputs[SEGMENT_ID_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-            const UINT64* src = inputs[SEGMENT_ID_IDX]->cbuffer().as<const UINT64*>();
-            cpu_memcpy(_segmentIds.data(), src, inputs[SEGMENT_ID_IDX]->byteSize());
-        }
-
-        if (inputs.size() > NUM_SEGMENTS_IDX) {
-            if (inputs[NUM_SEGMENTS_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-                const INT32* src = inputs[NUM_SEGMENTS_IDX]->cbuffer().as<const INT32*>();
-                _numSegments = static_cast<size_t>(*src);
-            } else if (inputs[NUM_SEGMENTS_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-                const INT64* src = inputs[NUM_SEGMENTS_IDX]->cbuffer().as<const INT64*>();
-                _numSegments = *src;
-            }
-        }
-
-        // Initialize default index
-        _defaultIndices.clear();
-        if (inputs.size() > DEFAULT_INDEX_IDX) {
-            if (inputs[DEFAULT_INDEX_IDX]->getTensorDesc().getPrecision().size() == sizeof(INT32)) {
-                const INT32* src = inputs[DEFAULT_INDEX_IDX]->cbuffer().as<const INT32*>();
-                _defaultIndices.push_back(static_cast<size_t>(*src));
-            } else if (inputs[DEFAULT_INDEX_IDX]->getTensorDesc().getPrecision().size() == sizeof(UINT64)) {
-                const INT64* src = inputs[DEFAULT_INDEX_IDX]->cbuffer().as<const INT64*>();
-                _defaultIndices.push_back(*src);
-            }
-        }
-    }
-
-    void getIndices(size_t embIndex, const size_t*& indices, size_t& size, size_t& weightsIdx, bool& withWeight) override {
-        if (embIndex >= _numSegments)
-            IE_THROW() << "Invalid embedding bag index.";
-
-        indices = nullptr;
-        size = 0lu;
-        withWeight = true;
-
-        for (size_t si = 0; si < _indices.size(); si++) {
-            if (_segmentIds[si] == embIndex) {
-                size++;
-                if (indices == nullptr) {
-                    indices = _indices.data() + si;
-                    weightsIdx = si;
-                }
-            }
-        }
-
-        // Empty bag
-        if (size == 0) {
-            size = 1lu;
-            withWeight = false;
-            if (_defaultIndices.size() == 1lu)
-                indices = _defaultIndices.data();
-            return;
-        }
-    }
-
-protected:
-    const size_t SEGMENT_ID_IDX = 2lu;
-    const size_t NUM_SEGMENTS_IDX = 3lu;
-
-    size_t _numSegments = 0lu;
-
-    std::vector<size_t> _indices;
-    std::vector<size_t> _segmentIds;
-    std::vector<size_t> _defaultIndices;
-};
-
-REG_FACTORY_FOR(EmbeddingSegmentsSumImpl, EmbeddingSegmentsSum);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.cpp b/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.cpp
index a8f32387ec8..b0f0aa5d327 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.cpp
@@ -10,6 +10,9 @@
 #include <cstring>
 #include <string>
 #include <cmath>
+#include <ngraph/opsets/opset3.hpp>
+
+using namespace MKLDNNPlugin;
 
 namespace InferenceEngine {
 namespace Extensions {
@@ -267,41 +270,65 @@ private:
     }
 };
 
-ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
+bool ExtractImagePatchesImpl::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
     try {
-        std::string errorPrefix = std::string("Layer ") + layer->type + " with name '" + layer->name + "' ";
-        if (details::CaselessEq<std::string>()("ExtractImagePatchesLayer", layer->type))
-            IE_THROW() << errorPrefix << "is not an instance of ExtractImagePatchesLayer class";
+        const auto extImgPatcher = std::dynamic_pointer_cast<const ngraph::opset3::ExtractImagePatches>(op);
+        if (!extImgPatcher) {
+            errorMessage = "Only opset3 ExtractImagePatches operation is supported";
+            return false;
+        }
+        const auto padValue = extImgPatcher->get_auto_pad();
+        if (!one_of(padValue, ngraph::op::PadType::VALID, ngraph::op::PadType::SAME_LOWER, ngraph::op::PadType::SAME_UPPER)) {
+            errorMessage = "Does not support pad type: " + ngraph::as_string(padValue);
+            return false;
+        }
+        if (!everyone_is(2, extImgPatcher->get_sizes().size(), extImgPatcher->get_strides().size(), extImgPatcher->get_rates().size())) {
+            errorMessage = "Doesn't support 'sizes', 'strides', 'rates', attributes with rank != 2";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
 
-        if (layer->insData.size() != 1 || layer->outData.size() != 1)
-            IE_THROW() << errorPrefix << "has incorrect number of input or output edges!"
-                << " Input: " << layer->insData.size() << "; Output: " << layer->outData.size();
+ExtractImagePatchesImpl::ExtractImagePatchesImpl(const std::shared_ptr<ngraph::Node>& op) {
+    try {
+        std::string errorMessage;
+        if (!isSupportedOperation(op, errorMessage)) {
+            IE_THROW(NotImplemented) << errorMessage;
+        }
 
-        auto inData = layer->insData[0].lock();
-        if (inData == nullptr)
-            IE_THROW() << errorPrefix << "has nullable input data";
+        errorPrefix = "ExtractImagePatches layer with name '" + op->get_friendly_name() + "' ";
+        const auto extImgPatcher = std::dynamic_pointer_cast<const ngraph::opset3::ExtractImagePatches>(op);
 
-        if (inData->getTensorDesc().getDims().size() != 4)
-            IE_THROW() << errorPrefix << "must have 4D input tensor. Actual: " << inData->getTensorDesc().getDims().size();
+        if (op->get_input_size() != 1 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << "has incorrect number of input or output edges!"
+                           << " Input: " << op->get_input_size() << "; Output: " << op->get_output_size();
 
-        if (layer->outData[0]->getTensorDesc().getDims().size() != 4)
-            IE_THROW() << errorPrefix << "must have 4D output tensor. Actual: " << layer->outData[0]->getTensorDesc().getDims().size();
+        if (op->get_input_shape(0).size() != 4)
+                IE_THROW() << errorPrefix << "must have 4D input tensor. Actual: " << op->get_input_shape(0).size();
 
-        if (inData->getLayout() != NCHW)
-            IE_THROW() << errorPrefix << "has unsupported layout: " << inData->getLayout();
+        if (op->get_output_shape(0).size() != 4)
+            IE_THROW() << errorPrefix << "must have 4D output tensor. Actual: " << op->get_output_shape(0).size();
 
-        const auto precision = inData->getTensorDesc().getPrecision();
-        if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end())
-            IE_THROW() << errorPrefix << "has unsupported precision: " << precision.name();
+        const auto precision = details::convertPrecision(op->get_input_element_type(0));
+            if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end())
+                IE_THROW() << errorPrefix << "has unsupported precision: " << precision.name();
+
+        auto ksizes = extImgPatcher->get_sizes();
+        auto strides = extImgPatcher->get_strides();
+        auto rates = extImgPatcher->get_rates();
+        if (extImgPatcher->get_auto_pad() == ngraph::op::PadType::VALID) {
+            _auto_pad = ExtImgPatcherPadType::VALID;
+        } else if (extImgPatcher->get_auto_pad() == ngraph::op::PadType::SAME_LOWER) {
+            _auto_pad = ExtImgPatcherPadType::SAME_LOWER;
+        } else if (extImgPatcher->get_auto_pad() == ngraph::op::PadType::SAME_UPPER) {
+            _auto_pad = ExtImgPatcherPadType::SAME_UPPER;
+        } else {
+            IE_THROW() << errorPrefix << "has unsupported pad type: " << extImgPatcher->get_auto_pad();
+        }
 
-        auto ksizes = layer->GetParamAsUInts("sizes");
-        auto strides = layer->GetParamAsUInts("strides");
-        auto rates = layer->GetParamAsUInts("rates");
-        std::string auto_pad = layer->GetParamAsString("auto_pad");
-        if (!CaselessEq<std::string>()(auto_pad, "valid")
-                && !CaselessEq<std::string>()(auto_pad, "same_upper")
-                && !CaselessEq<std::string>()(auto_pad, "same_lower"))
-            IE_THROW() <<  errorPrefix << "has unsupported auto_pad value: " << auto_pad;
         if (ksizes.size() != 2 || strides.size() != 2 || rates.size() != 2)
             IE_THROW() << errorPrefix << "must have the following attributes with shape {2}: sizes, strides, rates.";
         _ksizes.clear();
@@ -323,12 +350,12 @@ ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
             _rates.push_back(static_cast<size_t>(x));
         }
 
-        SizeVector in_dims = inData->getTensorDesc().getDims();
+        SizeVector in_dims = op->get_input_shape(0);
         _pad_left = 0;
         _pad_top = 0;
         jit_extract_image_patches_params jpp;
         jpp.need_padding = false;
-        if (!CaselessEq<std::string>()(auto_pad, "valid")) {
+        if (_auto_pad != ExtImgPatcherPadType::VALID) {
             const size_t iheight = in_dims[2];
             const size_t iwidth = in_dims[3];
             const int64_t ihStep = _ksizes[0] + (_rates[0] - 1) * (_ksizes[0] - 1);
@@ -338,9 +365,9 @@ ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
             int64_t PH = (std::ceil(1.f * iheight/_strides[0]) - 1) * _strides[0] + ihStep - iheight;
 
             int64_t increment_sign = 0;
-            if (CaselessEq<std::string>()(auto_pad, "same_lower")) {
+            if (_auto_pad == ExtImgPatcherPadType::SAME_LOWER) {
                 increment_sign = 1;
-            } else if (CaselessEq<std::string>()(auto_pad, "same_upper")) {
+            } else if (_auto_pad == ExtImgPatcherPadType::SAME_UPPER) {
                 increment_sign = -1;
             }
 
@@ -355,14 +382,14 @@ ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
         }
 
         jpp.IW = in_dims[3];
-        SizeVector out_dims = layer->outData[0]->getTensorDesc().getDims();
+        SizeVector out_dims = op->get_output_shape(0);
         jpp.OH = out_dims[2];
         jpp.OW = out_dims[3];
         jpp.KH = _ksizes[0];
         jpp.KW = _ksizes[1];
         jpp.SH = _strides[0];
         jpp.SW = _strides[1];
-        jpp.dtype_size = layer->insData.front().lock()->getPrecision().size();
+        jpp.dtype_size = precision.size();
         jpp.block_size = 1;
 
         if (mayiuse(x64::avx512_common)) {
@@ -379,26 +406,13 @@ ExtractImagePatchesImpl::ExtractImagePatchesImpl(const CNNLayer* layer) {
         if (extract_image_patches_kernel)
             extract_image_patches_kernel->create_ker();
 
-        LayerConfig config;
-
-        DataConfig inConfig;
-        inConfig.desc = inData->getTensorDesc();
-        config.inConfs.push_back(inConfig);
-
-        DataConfig outConfig;
-        outConfig.desc = layer->outData[0]->getTensorDesc();
-        outConfig.desc.setPrecision(inConfig.desc.getPrecision());
-        outConfig.desc.setLayout(inConfig.desc.getLayout());
-        config.outConfs.push_back(outConfig);
-
-        config.dynBatchSupport = false;
-        confs.push_back(config);
+        addConfig(op, {{TensorDescCreatorTypes::ncsp, precision}},
+                      {{TensorDescCreatorTypes::ncsp, precision}});
     } catch (InferenceEngine::Exception &ex) {
         errorMsg = ex.what();
     }
 }
 
-
 StatusCode ExtractImagePatchesImpl::execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept {
     const char *src_data = inputs[0]->cbuffer().as<const char *>() +
             inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.hpp b/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.hpp
index 0effc66b565..8cbd9c4f605 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/extract_image_patches.hpp
@@ -42,10 +42,17 @@ struct jit_uni_extract_image_patches_kernel {
 
 class ExtractImagePatchesImpl : public ExtLayerBase {
 public:
-    explicit ExtractImagePatchesImpl(const CNNLayer*);
+    explicit ExtractImagePatchesImpl(const std::shared_ptr<ngraph::Node>& op);
     StatusCode execute(std::vector<Blob::Ptr>&, std::vector<Blob::Ptr>&, ResponseDesc*) noexcept override;
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
 
 private:
+    enum class ExtImgPatcherPadType {
+        VALID,
+        SAME_LOWER,
+        SAME_UPPER
+    };
+
     std::vector<size_t> _ksizes;
     std::vector<size_t> _strides;
     std::vector<size_t> _rates;
@@ -53,6 +60,10 @@ private:
     size_t _pad_top;
     std::shared_ptr<jit_uni_extract_image_patches_kernel> extract_image_patches_kernel;
     static const std::set<size_t> _supported_precisions_sizes;
+
+    ExtImgPatcherPadType _auto_pad;
+
+    std::string errorPrefix;
 };
 
 REG_FACTORY_FOR(ExtractImagePatchesImpl, ExtractImagePatches);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp b/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
deleted file mode 100644
index 08100f1b2c3..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <cassert>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class FillImpl: public ExtLayerBase {
-public:
-    explicit FillImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-
-            if (layer->insData.size() != 2)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
-
-            SizeVector fill_dims = layer->insData[FILL_DIMS].lock()->getTensorDesc().getDims();
-            if (fill_dims.size() > 1)
-                IE_THROW() << layer->name << " Fill dimensions vector should be 1 dimension";
-
-            SizeVector value_dims = layer->insData[FILL_VALUE].lock()->getTensorDesc().getDims();
-            if (value_dims.size() > 1)
-                IE_THROW() << layer->name << " Value scalar should have 1 dimension";
-
-            if (!(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
-                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
-                !(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
-                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
-                addConfig(layer, { DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN, Precision::FP32) },
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32) });
-            } else {
-                addConfig(layer, { DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN) },
-                                { DataConfigurator(ConfLayout::PLN) });
-            }
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        int32_t* fill_dims = inputs[FILL_DIMS]->cbuffer().as<int32_t *>() +
-                             inputs[FILL_DIMS]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        size_t fill_size = inputs[FILL_DIMS]->getTensorDesc().getDims()[0];
-        SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
-
-        if (dst_dims.size() != fill_size) {
-            if (resp) {
-                std::string errorMsg = "Output tensor dimension mismatch";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return PARAMETER_MISMATCH;
-        }
-
-        size_t work_amount_dst = 1;
-        for (size_t i = 0; i < dst_dims.size(); i++) {
-            work_amount_dst *= fill_dims[i];
-            if (static_cast<int>(dst_dims[i]) != fill_dims[i]) {
-                if (resp) {
-                    std::string errorMsg = "Output tensor dimension size mismatch";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                }
-                return PARAMETER_MISMATCH;
-            }
-        }
-
-        switch (outputs[0]->getTensorDesc().getPrecision()) {
-        case Precision::FP32: {
-            float* dst_data = outputs[0]->cbuffer().as<float *>() +
-                              outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-            float value = (inputs[FILL_VALUE]->cbuffer().as<float *>() +
-                           inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
-
-            parallel_nt(0, [&](const int ithr, const int nthr) {
-                size_t start = 0, end = 0;
-                splitter(work_amount_dst, nthr, ithr, start, end);
-                std::fill_n(dst_data + start, end - start, value);
-            });
-        }
-        break;
-        case Precision::I32: {
-            int32_t* dst_data = outputs[0]->cbuffer().as<int32_t *>() +
-                                outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-            int32_t value = (inputs[FILL_VALUE]->cbuffer().as<int32_t *>() +
-                             inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
-
-            parallel_nt(0, [&](const int ithr, const int nthr) {
-                size_t start = 0, end = 0;
-                splitter(work_amount_dst, nthr, ithr, start, end);
-                std::fill_n(dst_data + start, end - start, value);
-            });
-            return OK;
-        }
-        break;
-        default:
-            if (resp) {
-                std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return GENERAL_ERROR;
-        }
-
-        return OK;
-    }
-
-private:
-    const size_t FILL_DIMS = 0;
-    const size_t FILL_VALUE = 1;
-};
-
-REG_FACTORY_FOR(FillImpl, Fill);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
deleted file mode 100644
index 9479e1d2e58..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <cassert>
-#include <algorithm>
-#include <limits>
-#include "ie_parallel.hpp"
-#include "common/cpu_memcpy.h"
-#include "common/fp16_utils.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class GatherImpl: public ExtLayerBase {
-public:
-    explicit GatherImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != 2 || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-
-            Precision inIdxPrecision = layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getPrecision();
-            if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::FP16)
-                inIdxPrecision = Precision::I32;
-
-            axis = layer->GetParamAsInt("axis");
-
-            const SizeVector& dictionary_dims = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getDims();
-            if (dictionary_dims.size() == 0)
-                IE_THROW() << layer->name << " Incorrect input parameters dimension!";
-            // Dictionary must be at least rank axis + 1
-            IE_ASSERT(-static_cast<int>(dictionary_dims.size()) <= axis && axis < static_cast<int>(dictionary_dims.size()))
-                << layer->name << " Incorrect input parameters dimensions and axis number!";
-            if (axis < 0)
-                axis += dictionary_dims.size();
-
-            //  Find number of dictionaries, index range and data length
-            for (int i = 0; i < axis; i++)
-                numDictionaries *= dictionary_dims[i];
-            indexRange = dictionary_dims[axis];
-            for (size_t i = axis + 1; i < dictionary_dims.size(); i++)
-                dataLength *= dictionary_dims[i];
-
-            if (dataLength == 0)
-                IE_THROW() << layer->name << " Incorrect input parameters dimension!";
-
-            LayerConfig config;
-            DataConfig dataConfigIdx, dataConfigDct;
-            Precision dataPrecision = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getPrecision();
-            dataConfigDct.desc = TensorDesc(dataPrecision, dictionary_dims,
-                    layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getLayoutByDims(dictionary_dims));
-            config.inConfs.push_back(dataConfigDct);
-            const SizeVector& indexes_dims = layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getDims();
-            dataConfigIdx.desc = TensorDesc(inIdxPrecision, indexes_dims,
-                    layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getLayout());
-            config.inConfs.push_back(dataConfigIdx);
-
-            DataConfig dataConfigOut;
-            const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims();
-            dataConfigOut.desc = TensorDesc(dataPrecision, out_dims,
-                    layer->outData[0]->getTensorDesc().getLayoutByDims(out_dims));
-            config.outConfs.push_back(dataConfigOut);
-            config.dynBatchSupport = false;
-            confs.push_back(config);
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    struct f32toUi32 {
-        inline unsigned int operator()(const float value) {
-            return static_cast<unsigned int>(value);
-        }
-    };
-
-    struct f16toUi32 {
-        inline unsigned int operator()(const ie_fp16 value) {
-            return static_cast<unsigned int>(f16tof32(value));
-        }
-    };
-
-    struct i32toUi32 {
-        inline unsigned int operator()(const int32_t value) {
-            return static_cast<unsigned int>(value);
-        }
-    };
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        switch (inputs[GATHER_INDEXES]->getTensorDesc().getPrecision()) {
-            case Precision::FP32:
-                gather<float, f32toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
-                break;
-            case Precision::FP16:
-                gather<ie_fp16, f16toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
-                break;
-            case Precision::I32:
-                gather<int32_t, i32toUi32>(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
-                break;
-            default:
-                return GENERAL_ERROR;
-        }
-
-        return OK;
-    }
-
-private:
-    template <typename index_t, class Conversion>
-    void gather(Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output) {
-        size_t src_indexSize = indexes->size();
-        const index_t *src_index = indexes->cbuffer().as<const index_t *>() + indexes->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const uint8_t *src_dataDict = dictionary->cbuffer().as<const uint8_t *>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        uint8_t *dst_data = output->cbuffer().as<uint8_t*>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        size_t len = dataLength * dictionary->getTensorDesc().getPrecision().size();
-
-        parallel_for(src_indexSize, [&](size_t i) {
-            unsigned int idx = Conversion()(src_index[i]);
-
-            //  Index clipping
-            if (idx < indexRange) {
-                //  Copying data to destination from Dictionary
-                for (size_t j = 0; j < numDictionaries; j++) {
-                    cpu_memcpy_s(&dst_data[len * (i + j * src_indexSize)],
-                                output->byteSize() - (len * (i + j * src_indexSize)),
-                                &src_dataDict[len * (idx + j * indexRange)],
-                                len);
-                }
-            } else {
-                for (size_t j = 0; j < numDictionaries; j++) {
-                    memset(&dst_data[len * (i + j * src_indexSize)], 0, len);
-                }
-            }
-        });
-    }
-
-    int axis = 0;
-    size_t numDictionaries = 1;
-    size_t indexRange = 0;
-    size_t dataLength = 1;
-    const size_t GATHER_DICTIONARY = 0;
-    const size_t GATHER_INDEXES = 1;
-};
-
-
-REG_FACTORY_FOR(GatherImpl, Gather);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather_elements.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather_elements.cpp
deleted file mode 100644
index af01f51e4d2..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/gather_elements.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <string>
-#include <vector>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class GatherElementsImpl: public ExtLayerBase {
-public:
-    explicit GatherElementsImpl(const CNNLayer* layer) : strideAx1Diff_(0) {
-        errorPrefix_ = std::string("Layer GatherElements with name '") + layer->name + "'";
-
-        if (layer->insData.size() != 2 || layer->outData.size() != 1)
-            IE_THROW() << errorPrefix_ << " has invalid number of input/output edges.";
-
-        auto inputData = layer->insData[dataIndex_].lock();
-        auto indices = layer->insData[indicesIndex_].lock();
-        if (!inputData || !indices)
-            IE_THROW() << errorPrefix_ << " has nullable inputs.";
-
-        const auto& dataDims = inputData->getTensorDesc().getDims();
-        const auto& indicesDims = indices->getTensorDesc().getDims();
-        if (dataDims.size() != indicesDims.size())
-            IE_THROW() << errorPrefix_ << " has invalid input shapes. Inputs 'Data' and 'Indices' must have equal ranks.";
-
-        Precision dataPrecision = inputData->getTensorDesc().getPrecision();
-        if (dataPrecision.size() != sizeof(PrecisionTrait<Precision::I32>::value_type) &&
-                dataPrecision.size() != sizeof(PrecisionTrait<Precision::I16>::value_type) &&
-                dataPrecision.size() != sizeof(PrecisionTrait<Precision::I8>::value_type)) {
-            IE_THROW() << errorPrefix_ << " has unsupported 'inputData' input precision: " << dataPrecision;
-        }
-
-        Precision indicesPrecision = indices->getTensorDesc().getPrecision();
-        if (indicesPrecision != Precision::I32) {
-            IE_THROW() << errorPrefix_ << " has unsupported 'indices' input precision: " << indicesPrecision;
-        }
-
-        dataTypeSize_ = dataPrecision.size();
-
-        int axis = layer->GetParamAsInt("axis");
-        if (axis < 0)
-            axis += dataDims.size();
-        if (axis < 0 || axis >= static_cast<int>(dataDims.size()))
-            IE_THROW() << errorPrefix_ << " has invalid axis attribute: " << axis;
-        axis_ = axis;
-
-        auto& outputData = layer->outData[0];
-        strideAxDst_ = outputData->getTensorDesc().getBlockingDesc().getStrides()[axis_];
-        dstAxDim_ = outputData->getTensorDesc().getDims()[axis_];
-        if (axis_ > 0) {
-            strideAx1Diff_ = inputData->getTensorDesc().getBlockingDesc().getStrides()[axis_ - 1] -
-                    outputData->getTensorDesc().getBlockingDesc().getStrides()[axis_ - 1];
-        }
-
-        LayerConfig config;
-        DataConfig dataConfig, indicesConfig, outConfig;
-        dataConfig.desc = TensorDesc(dataPrecision, dataDims,
-            inputData->getTensorDesc().getLayoutByDims(dataDims));
-        config.inConfs.push_back(dataConfig);
-        indicesConfig.desc = TensorDesc(Precision::I32, indicesDims,
-            indices->getTensorDesc().getLayoutByDims(indicesDims));
-        config.inConfs.push_back(indicesConfig);
-
-        const auto& outDims = outputData->getTensorDesc().getDims();
-        outConfig.desc = TensorDesc(dataPrecision, outDims,
-                outputData->getTensorDesc().getLayoutByDims(outDims));
-        config.outConfs.push_back(outConfig);
-
-        config.dynBatchSupport = false;
-
-        confs.push_back(config);
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        switch (dataTypeSize_) {
-            case sizeof(PrecisionTrait<Precision::I32>::value_type):
-                return directExecution<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs, resp);
-            case sizeof(PrecisionTrait<Precision::I16>::value_type):
-                return directExecution<PrecisionTrait<Precision::I16>::value_type>(inputs, outputs, resp);
-            case sizeof(PrecisionTrait<Precision::I8>::value_type):
-                return directExecution<PrecisionTrait<Precision::I8>::value_type>(inputs, outputs, resp);
-            default:
-                std::string errMsg = errorPrefix_ + " has inputData input with unsupported precision: " +
-                    inputs[dataIndex_]->getTensorDesc().getPrecision().name();
-                errMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                return GENERAL_ERROR;
-        }
-    }
-
-protected:
-    template <typename dataType>
-    StatusCode directExecution(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept {
-        const dataType* srcData = inputs[dataIndex_]->cbuffer().as<const dataType*>() +
-            inputs[dataIndex_]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int* indices = inputs[indicesIndex_]->cbuffer().as<const int*>() +
-            inputs[indicesIndex_]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        dataType* dstData = outputs[0]->buffer().as<dataType*>() +
-            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        const int outSize = outputs[0]->size();
-        auto threadBody = [&](const int ithr, const int nthr) {
-            int start(0lu), end(0lu);
-            splitter(outSize, nthr, ithr, start, end);
-            if (start >= end)
-                return;
-
-            int axStrideIt = start % strideAxDst_;
-            int dstAxIdx = (start / strideAxDst_) % dstAxDim_;
-            int dstShift0 = (start / strideAxDst_ / dstAxDim_) * strideAx1Diff_;
-
-            for (size_t o = start; o < end; o++, axStrideIt++) {
-                if (axStrideIt == strideAxDst_) {
-                    axStrideIt = 0;
-                    dstAxIdx++;
-                    if (dstAxIdx == dstAxDim_) {
-                        dstAxIdx = 0;
-                        dstShift0 += strideAx1Diff_;
-                    }
-                }
-                dstData[o] = srcData[o + dstShift0 + (indices[o] - dstAxIdx) * strideAxDst_];
-            }
-        };
-        parallel_nt(0, threadBody);
-
-        return OK;
-    }
-
-    const size_t dataIndex_ = 0;
-    const size_t indicesIndex_ = 1;
-
-    size_t axis_;
-    size_t dataTypeSize_;
-    int strideAxDst_;
-    int dstAxDim_;
-    int strideAx1Diff_;
-    std::string errorPrefix_;
-};
-
-REG_FACTORY_FOR(GatherElementsImpl, GatherElements);
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather_nd.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather_nd.cpp
deleted file mode 100644
index 61d80aecf23..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/gather_nd.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <string>
-#include <vector>
-#include "ie_parallel.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class GatherNDImpl: public ExtLayerBase {
-public:
-    explicit GatherNDImpl(const CNNLayer* layer) {
-        _errorPrefix = std::string("Layer GatherND with name '") + layer->name + "'";
-
-        if (layer->insData.size() != 2 || layer->outData.size() != 1)
-            IE_THROW() << _errorPrefix << " has invalid number of input/output edges.";
-
-        auto data = layer->insData[_dataIndex].lock();
-        auto indices = layer->insData[_indicesIndex].lock();
-        if (!data || !indices)
-            IE_THROW() << _errorPrefix << " has nullable inputs.";
-        Precision dataPrecision = data->getTensorDesc().getPrecision();
-        if (dataPrecision.size() != sizeof(PrecisionTrait<Precision::I32>::value_type) &&
-                dataPrecision.size() != sizeof(PrecisionTrait<Precision::I16>::value_type) &&
-                dataPrecision.size() != sizeof(PrecisionTrait<Precision::I8>::value_type)) {
-            IE_THROW() << _errorPrefix << " has unsupported 'data' input precision: " << dataPrecision;
-        }
-
-        Precision indicesPrecision = indices->getTensorDesc().getPrecision();
-        if (indicesPrecision != Precision::I32 &&
-                indicesPrecision != Precision::I16 && indicesPrecision != Precision::U16 &&
-                indicesPrecision != Precision::I8 && indicesPrecision != Precision::U8) {
-            IE_THROW() << _errorPrefix << " has unsupported 'indices' input precision: " << indicesPrecision;
-        }
-
-        _dataTypeSize = dataPrecision.size();
-        const auto& dataDims = data->getTensorDesc().getDims();
-        const auto& indicesDims = indices->getTensorDesc().getDims();
-
-        _batchDims = layer->GetParamAsInt("batch_dims", 0);
-        if (_batchDims >= std::min(dataDims.size(), indicesDims.size()))
-            IE_THROW() << _errorPrefix << " has invalid batch_dims attribute: " << _batchDims;
-
-        _batchNum = 1lu;
-        for (size_t i = 0; i < _batchDims; i++) {
-            _batchNum *= indicesDims[i];
-        }
-
-        _sliceRank = indicesDims[indicesDims.size() - 1];
-        _dataRank = dataDims.size() - _batchDims;
-        if (_sliceRank > _dataRank)
-            IE_THROW() << _errorPrefix << " has invalid inputs shapes.";
-
-        _blockSize = 1;
-        for (size_t i = _sliceRank + _batchDims; i < dataDims.size(); i++) {
-            _blockSize *= dataDims[i];
-        }
-        _batchStep = 1;
-        for (size_t i = _batchDims; i < dataDims.size(); i++) {
-            _batchStep *= dataDims[i];
-        }
-
-        LayerConfig config;
-        DataConfig dataConfig, indicesConfig, outConfig;
-        dataConfig.desc = TensorDesc(dataPrecision, dataDims,
-            data->getTensorDesc().getLayoutByDims(dataDims));
-        config.inConfs.push_back(dataConfig);
-        indicesConfig.desc = TensorDesc(Precision::I32, indicesDims,
-            indices->getTensorDesc().getLayoutByDims(indicesDims));
-        config.inConfs.push_back(indicesConfig);
-
-        const auto& outDims = layer->outData[0]->getTensorDesc().getDims();
-        outConfig.desc = TensorDesc(dataPrecision, outDims,
-                layer->outData[0]->getTensorDesc().getLayoutByDims(outDims));
-        config.outConfs.push_back(outConfig);
-        config.dynBatchSupport = false;
-
-        confs.push_back(config);
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        if (_blockSize > 1) {
-            gatherBlocks(inputs, outputs, resp);
-        } else {
-            switch (_dataTypeSize) {
-                case sizeof(PrecisionTrait<Precision::I32>::value_type):
-                    gatherElementwise<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs, resp);
-                    break;
-                case sizeof(PrecisionTrait<Precision::I16>::value_type):
-                    gatherElementwise<PrecisionTrait<Precision::I16>::value_type>(inputs, outputs, resp);
-                    break;
-                case sizeof(PrecisionTrait<Precision::I8>::value_type):
-                    gatherElementwise<PrecisionTrait<Precision::I8>::value_type>(inputs, outputs, resp);
-                    break;
-                default:
-                    std::string errMsg = _errorPrefix + " has data input with unsupported precision: " +
-                        inputs[_dataIndex]->getTensorDesc().getPrecision().name();
-                    errMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                    return GENERAL_ERROR;
-            }
-        }
-
-        return OK;
-    }
-
-protected:
-    template <typename dataType>
-    void gatherElementwise(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept {
-        const dataType* srcData = inputs[_dataIndex]->cbuffer().as<const dataType*>() +
-            inputs[_dataIndex]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int* indices = inputs[_indicesIndex]->cbuffer().as<const int*>() +
-            inputs[_indicesIndex]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        dataType* dstData = outputs[0]->buffer().as<dataType*>() +
-            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        const size_t* srcMultipliers = inputs[_dataIndex]->getTensorDesc().getBlockingDesc().getStrides().data() + _batchDims;
-
-        const size_t cycles = outputs[0]->byteSize() / (sizeof(dataType) * _batchNum);
-        const size_t CS = cycles * _sliceRank;
-        const size_t CB = cycles * _blockSize;
-        const size_t workAmount = _batchNum * cycles;
-
-        auto threadBody = [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(workAmount, nthr, ithr, start, end);
-            if (start >= end)
-                return;
-            size_t bStart = start / cycles;
-            size_t cStart = start % cycles;
-            size_t workCounter = start;
-
-            const dataType* shiftedSrcData = srcData + bStart * _batchStep;
-            const int* shiftedIndices = indices + bStart * CS + cStart * _sliceRank;
-            dataType* shiftedDstData = dstData + bStart * CB + cStart * _blockSize;
-
-            for (size_t b = bStart; b < _batchNum; b++) {
-                for (size_t j = cStart; j < cycles; j++) {
-                    size_t dataIdx = 0lu;
-                    for (size_t i = 0lu; i < _sliceRank; i++)
-                        dataIdx += srcMultipliers[i] * shiftedIndices[i];
-                    shiftedDstData[0] = shiftedSrcData[dataIdx];
-                    shiftedDstData++;
-                    shiftedIndices += _sliceRank;
-                    if (++workCounter == end) {
-                        return;
-                    }
-                }
-                cStart = 0lu;
-                shiftedSrcData += _batchStep;
-            }
-        };
-
-        parallel_nt(0, threadBody);
-    }
-
-    void gatherBlocks(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept {
-        const uint8_t* srcData = inputs[_dataIndex]->cbuffer().as<const uint8_t*>() +
-            inputs[_dataIndex]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int* indices = inputs[_indicesIndex]->cbuffer().as<const int*>() +
-            inputs[_indicesIndex]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        uint8_t* dstData = outputs[0]->buffer().as<uint8_t*>() +
-            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        std::vector<size_t> srcMultipliers(_sliceRank);
-        for (size_t i = 0; i < _sliceRank ; i++)
-            srcMultipliers[i] = _dataTypeSize * inputs[_dataIndex]->getTensorDesc().getBlockingDesc().getStrides()[i + _batchDims];
-
-        const size_t batchStep = _batchStep * _dataTypeSize;
-        const size_t dataStep = _blockSize * _dataTypeSize;
-        const size_t cycles = outputs[0]->byteSize() / (dataStep * _batchNum);
-        const size_t CS = cycles * _sliceRank;
-        const size_t CB = cycles * dataStep;
-        const size_t workAmount = _batchNum * cycles;
-
-        auto threadBody = [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(workAmount, nthr, ithr, start, end);
-            if (start >= end)
-                return;
-            size_t bStart = start / cycles;
-            size_t cStart = start % cycles;
-            size_t workCounter = start;
-
-            const uint8_t* shiftedSrcData = srcData + bStart * batchStep;
-            const int* shiftedIndices = indices + bStart * CS + cStart * _sliceRank;
-            uint8_t* shiftedDstData = dstData + bStart * CB + cStart * dataStep;
-
-            for (size_t b = bStart; b < _batchNum; b++) {
-                for (size_t j = cStart; j < cycles; j++) {
-                    size_t dataIdx = 0lu;
-                    for (size_t i = 0; i < _sliceRank ; i++)
-                        dataIdx += srcMultipliers[i] * shiftedIndices[i];
-                    cpu_memcpy(shiftedDstData, &(shiftedSrcData[dataIdx]), dataStep);
-                    shiftedDstData += dataStep;
-                    shiftedIndices += _sliceRank;
-                    if (++workCounter == end) {
-                        return;
-                    }
-                }
-                cStart = 0;
-                shiftedSrcData += batchStep;
-            }
-        };
-
-        parallel_nt(0, threadBody);
-    }
-
-    size_t _dataRank;
-    size_t _sliceRank;
-    size_t _blockSize;
-    size_t _batchDims;
-    size_t _batchNum;
-    size_t _batchStep;
-    size_t _dataTypeSize;
-    const size_t _dataIndex = 0;
-    const size_t _indicesIndex = 1;
-    std::string _errorPrefix;
-};
-
-
-REG_FACTORY_FOR(GatherNDImpl, GatherND);
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
index e7689b1209b..4ea74721adc 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
@@ -3,6 +3,9 @@
 //
 
 #include "base.hpp"
+#include <ngraph/op/gather_tree.hpp>
+#include <nodes/common/tensor_desc_creator.h>
+#include <utils/general_utils.h>
 
 #include <cmath>
 #include <limits>
@@ -17,45 +20,71 @@ namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class GatherTreeImpl: public ExtLayerBase {
 public:
-    explicit GatherTreeImpl(const CNNLayer* layer) {
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges.";
+            auto gatherElementsOp = ngraph::as_type_ptr<const ngraph::op::v1::GatherTree>(op);
+            if (!gatherElementsOp) {
+                errorMessage = "Node is not an instance of the GatherTree operation from operation set v1.";
+                return false;
+            }
 
-            if (layer->insData.size() != 4)
-                IE_THROW() << layer->name << " Incorrect number of input edges.";
-            if (layer->outData.size() != 1)
-                IE_THROW() << layer->name << " Incorrect number of output edges.";
-
-            precision = layer->insData[GATHER_TREE_STEP_IDX].lock()->getTensorDesc().getPrecision();
-            if (precision != Precision::FP32 && precision != Precision::I32)
-                precision = Precision::FP32;
-
-            if (layer->insData[GATHER_TREE_PARENT_IDX].lock()->getTensorDesc().getPrecision() != precision ||
-                layer->insData[GATHER_TREE_MAX_SEQ_LEN].lock()->getTensorDesc().getPrecision() != precision ||
-                layer->insData[GATHER_TREE_END_TOKEN].lock()->getTensorDesc().getPrecision() != precision ||
-                layer->outData[0]->getTensorDesc().getPrecision() != precision)
-                IE_THROW() << layer->name << " Incorrect input/output data tensor precision. Should be the same.";
-
-            if (layer->insData[GATHER_TREE_STEP_IDX].lock()->getTensorDesc().getDims().size() != 3)
-                IE_THROW() << layer->name << " step_idx vector should be 3 dimension";
-            if (layer->insData[GATHER_TREE_PARENT_IDX].lock()->getTensorDesc().getDims().size() != 3)
-                IE_THROW() << layer->name << " parent_idx vector should be 3 dimension";
-            if (layer->insData[GATHER_TREE_MAX_SEQ_LEN].lock()->getTensorDesc().getDims().size() != 1)
-                IE_THROW() << layer->name << " max_seq_len vector should be 1 dimension";
-            if (layer->insData[GATHER_TREE_END_TOKEN].lock()->getTensorDesc().getDims().size() != 1)
-                IE_THROW() << layer->name << " end_token should be 1 dimension";
-
-            addConfig(layer, { DataConfigurator(ConfLayout::PLN, precision), DataConfigurator(ConfLayout::PLN, precision),
-                               DataConfigurator(ConfLayout::PLN, precision), DataConfigurator(ConfLayout::PLN, precision) },
-                             { DataConfigurator(ConfLayout::PLN, precision) });
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
+            auto precision = op->get_input_element_type(GATHER_TREE_STEP_IDX);
+            if (!MKLDNNPlugin::one_of(precision, ngraph::element::f32, ngraph::element::i32))
+                precision = ngraph::element::f32;
+            if (op->get_input_element_type(GATHER_TREE_PARENT_IDX) != precision ||
+                    op->get_input_element_type(GATHER_TREE_MAX_SEQ_LEN) != precision ||
+                    op->get_input_element_type(GATHER_TREE_END_TOKEN) != precision ||
+                    op->get_output_element_type(0) != precision) {
+                errorMessage = "Node has incorrect input/output data precision. Must be the same.";
+                return false;
+            }
+        } catch (...) {
+            return false;
         }
+
+        return true;
     }
 
+    explicit GatherTreeImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            std::string errorPrefix = std::string("Node GatherTree with name '") + op->get_friendly_name() + "'";
+            if (op->get_input_size() != 4)
+                IE_THROW() << errorPrefix << " has incorrect number of input edges.";
+            if (op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of output edges.";
+
+            precision = details::convertPrecision(op->get_input_element_type(GATHER_TREE_STEP_IDX));
+            if (!MKLDNNPlugin::one_of(precision, Precision::FP32, Precision::I32))
+                precision = Precision::FP32;
+
+            if (op->get_input_shape(GATHER_TREE_STEP_IDX).size() != 3)
+                IE_THROW() << errorPrefix << " step_idx vector should be 3 dimension";
+            if (op->get_input_shape(GATHER_TREE_PARENT_IDX).size() != 3)
+                IE_THROW() << errorPrefix << " parent_idx vector should be 3 dimension";
+            if (op->get_input_shape(GATHER_TREE_MAX_SEQ_LEN).size() != 1)
+                IE_THROW() << errorPrefix << " max_seq_len vector should be 1 dimension";
+            if (op->get_input_shape(GATHER_TREE_END_TOKEN).size() != 0)
+                IE_THROW() << errorPrefix << " end_token should be 1 dimension";
+
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, precision},
+                           {TensorDescCreatorTypes::ncsp, precision},
+                           {TensorDescCreatorTypes::ncsp, precision},
+                           {TensorDescCreatorTypes::ncsp, precision}},
+                          {{TensorDescCreatorTypes::ncsp, precision}});
+        } catch (InferenceEngine::Exception &ex) {
+            errorMsg = ex.what();
+            throw;
+        }
+    }
 
     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
         if (precision == Precision::FP32)
@@ -140,10 +169,10 @@ public:
     }
 
 private:
-    const size_t GATHER_TREE_STEP_IDX = 0;
-    const size_t GATHER_TREE_PARENT_IDX = 1;
-    const size_t GATHER_TREE_MAX_SEQ_LEN = 2;
-    const size_t GATHER_TREE_END_TOKEN = 3;
+    static const size_t GATHER_TREE_STEP_IDX = 0;
+    static const size_t GATHER_TREE_PARENT_IDX = 1;
+    static const size_t GATHER_TREE_MAX_SEQ_LEN = 2;
+    static const size_t GATHER_TREE_END_TOKEN = 3;
 
     InferenceEngine::Precision precision;
 };
diff --git a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
index d773ecddc81..6ee077fd52f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
@@ -8,21 +8,48 @@
 #include <string>
 #include <vector>
 #include "ie_parallel.hpp"
+#include <ngraph/opsets/opset1.hpp>
+
+using namespace MKLDNNPlugin;
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
 class GRNImpl: public ExtLayerBase {
-public:
-    explicit GRNImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() != 1 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
+            const auto grn = std::dynamic_pointer_cast<const ngraph::opset1::GRN>(op);
+            if (!grn) {
+                errorMessage = "Only opset1 GRN operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            bias = layer->GetParamAsFloat("bias");
+    std::string errorPrefix;
 
-            addConfig(layer, {{ConfLayout::PLN, false, 0, Precision::FP32}}, {{ConfLayout::PLN, false, 0, Precision::FP32}});
+public:
+    explicit GRNImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            errorPrefix = "GRN layer with name '" + op->get_friendly_name() + "'";
+            const auto grn = std::dynamic_pointer_cast<const ngraph::opset1::GRN>(op);
+
+            if (op->get_input_size() != 1 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+            bias = grn->get_bias();
+
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
         }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list.hpp b/inference-engine/src/mkldnn_plugin/nodes/list.hpp
index 65275b91cf6..f87d890d3c2 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/list.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list.hpp
@@ -7,12 +7,12 @@
 #include <mkldnn_selective_build.h>
 
 #include <ie_iextension.h>
-#include <legacy/ie_layers.h>
 
 #include <string>
 #include <map>
 #include <memory>
 #include <algorithm>
+#include <ngraph/node.hpp>
 
 namespace InferenceEngine {
 
@@ -43,7 +43,7 @@ public:
 namespace Extensions {
 namespace Cpu {
 
-using ext_factory = std::function<InferenceEngine::ILayerImplFactory*(const InferenceEngine::CNNLayer*)>;
+using ext_factory = std::function<InferenceEngine::ILayerImplFactory*(const std::shared_ptr<ngraph::Node>& op)>;
 
 struct ExtensionsHolder {
     std::map<std::string, ext_factory> list;
@@ -60,11 +60,11 @@ public:
     }
 
     virtual StatusCode
-    getFactoryFor(ILayerImplFactory*& factory, const CNNLayer* cnnLayer, ResponseDesc* resp) noexcept {
+    getFactoryFor(ILayerImplFactory*& factory, const std::shared_ptr<ngraph::Node>& op, ResponseDesc* resp) noexcept {
         using namespace MKLDNNPlugin;
-        factory = layersFactory.createNodeIfRegistered(MKLDNNPlugin, cnnLayer->type, cnnLayer);
+        factory = layersFactory.createNodeIfRegistered(MKLDNNPlugin, op->get_type_name(), op);
         if (!factory) {
-            std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
+            std::string errorMsg = std::string("Factory for ") + op->get_type_name() + " wasn't found!";
             errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
             return NOT_FOUND;
         }
@@ -85,7 +85,7 @@ public:
 
     using LayersFactory = openvino::cc::Factory<
                                 std::string,
-                                InferenceEngine::ILayerImplFactory*(const InferenceEngine::CNNLayer*)>;
+                                InferenceEngine::ILayerImplFactory*(const std::shared_ptr<ngraph::Node>& op)>;
 
     LayersFactory layersFactory;
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
index e66af69e08f..de8fd66b708 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@@ -7,11 +7,7 @@
 # define MKLDNN_EXTENSION_NODE(__prim, __type)
 #endif
 
-MKLDNN_EXTENSION_NODE(EmbeddingBagOffsetsSumImpl, EmbeddingBagOffsetsSum);
-MKLDNN_EXTENSION_NODE(EmbeddingBagPackedSumImpl, EmbeddingBagPackedSum);
-MKLDNN_EXTENSION_NODE(EmbeddingSegmentsSumImpl, EmbeddingSegmentsSum);
 MKLDNN_EXTENSION_NODE(CTCLossImpl, CTCLoss);
-MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox);
 MKLDNN_EXTENSION_NODE(MathImpl, Abs);
 MKLDNN_EXTENSION_NODE(MathImpl, Acos);
 MKLDNN_EXTENSION_NODE(MathImpl, Acosh);
@@ -38,44 +34,20 @@ MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTo
 MKLDNN_EXTENSION_NODE(ExtractImagePatchesImpl, ExtractImagePatches);
 MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence);
 MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput);
-MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax);
-MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze);
 MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
-MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
 MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
 MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
-MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
-MKLDNN_EXTENSION_NODE(FillImpl, Fill);
-MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
-MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);
-MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot);
-MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast);
-MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
-MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense);
 MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
-MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
-MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronGenerateProposalsSingleImageImpl, ExperimentalDetectronGenerateProposalsSingleImage);
+MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppressionIEInternal);
 MKLDNN_EXTENSION_NODE(TopKImpl, TopK);
 MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels);
-MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile);
-MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace);
 MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
-MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS);
 MKLDNN_EXTENSION_NODE(GRNImpl, GRN);
-MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
 MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize);
 MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder);
 MKLDNN_EXTENSION_NODE(CTCGreedyDecoderSeqLenImpl, CTCGreedyDecoderSeqLen);
-MKLDNN_EXTENSION_NODE(GatherImpl, Gather);
-MKLDNN_EXTENSION_NODE(GatherElementsImpl, GatherElements);
-MKLDNN_EXTENSION_NODE(GatherNDImpl, GatherND);
 MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal);
 MKLDNN_EXTENSION_NODE(RangeImpl, Range);
-MKLDNN_EXTENSION_NODE(SelectImpl, Select);
 MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree);
-MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered);
-MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch);
-MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean);
-MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN);
-MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum);
 MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
index 0d22d29a012..337549e3434 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
@@ -11,30 +11,51 @@
 #include <vector>
 #include <cassert>
 #include "ie_parallel.hpp"
+#include <ngraph/opsets/opset5.hpp>
+
+using namespace MKLDNNPlugin;
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
 class LogSoftmaxImpl: public ExtLayerBase {
-public:
-    explicit LogSoftmaxImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
+            const auto logSoftMax = std::dynamic_pointer_cast<const ngraph::opset5::LogSoftmax>(op);
+            if (!logSoftMax) {
+                errorMessage = "Only opset5 LogSoftmax operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            if (layer->insData.size() != 1)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
+public:
+    explicit LogSoftmaxImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
 
-            SizeVector dims = layer->insData[0].lock()->getTensorDesc().getDims();
+            errorPrefix = "LogSoftmax layer with name '" + op->get_friendly_name() + "'";
+            const auto logSoftMax = std::dynamic_pointer_cast<const ngraph::opset5::LogSoftmax>(op);
+
+            if (op->get_input_size() != 1 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+            SizeVector dims = op->get_input_shape(0);
             if (!dims.size())
                 dims = SizeVector(1, 1);
-            int axis = layer->GetParamAsInt("axis", -1);
+            int axis = logSoftMax->get_axis();
             if (axis < 0)
                 axis += dims.size();
 
             if (dims.size() < static_cast<size_t>((size_t)(1) + axis))
-                IE_THROW() << layer->name << " Incorrect input parameters dimensions and axis number!";
+                IE_THROW() << errorPrefix << " has incorrect input parameters dimensions and axis number!";
 
             int j;
             for (j = dims.size() - 1; j >= 0; j--) {
@@ -48,7 +69,8 @@ public:
             for (size_t i = (axis + 1); i < dims.size(); i++)
                 reduced_axis_stride *= dims[i];
 
-            addConfig(layer, { { ConfLayout::PLN, false, 0, Precision::FP32 } }, { { ConfLayout::PLN, false, 0, Precision::FP32 } });
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
         }
@@ -103,6 +125,8 @@ private:
     size_t reduced_axis_stride = 1;
     size_t axis_step = 1;
     bool is_last_dim = false;
+
+    std::string errorPrefix;
 };
 
 REG_FACTORY_FOR(LogSoftmaxImpl, LogSoftmax);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/math.cpp b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
index 77b512ccced..5649ffc1ed8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/math.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
@@ -8,87 +8,67 @@
 #include <string>
 #include <vector>
 #include <cassert>
+
 #include "ie_parallel.hpp"
+#include "common/tensor_desc_creator.h"
+#include "utils/general_utils.h"
+#include <ngraph/ops.hpp>
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
-class MathImpl: public ExtLayerBase {
-    static float error_function(float x) {
-        const float clip_bound = 2.86f;
-        //  Points clip_bound and -clip_bound are extremums for this polynom
-        //  So in order to provide better accuracy comparing to std::erf we have to clip input range
-        if (x > clip_bound)
-            return 1;
-        if (x < -clip_bound)
-            return -1;
+using MKLDNNPlugin::TensorDescCreatorTypes;
 
-        //  A polynomial approximation of the error function
-        const float erfNumerator[4] = { 90.0260162353515625f, 2232.00537109375f,
-            7003.3251953125f, 55592.30078125f };
-        const float erfDenominator[5] = { 33.56171417236328125f, 521.35797119140625f,
-            4594.32373046875f, 22629.0f, 49267.39453125f };
-        float polynom = 9.60497379302978515625f;
-        float x2 = x * x;
-        for (float c : erfNumerator) {
-            polynom = polynom * x2 + c;
+class MathImpl: public ExtLayerBase {
+public:
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+        try {
+            if (initializers.find(op->get_type_info()) == initializers.end()) {
+                errorMessage = "Unsupported Math layer type.";
+                return false;
+            }
+
+            if (MKLDNNPlugin::one_of(op->get_type_info(),
+                    ngraph::op::v0::HardSigmoid::type_info,
+                    ngraph::op::v0::Selu::type_info)) {
+                auto firstConst = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+                auto secondConst = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+                if (!firstConst || !secondConst) {
+                    errorMessage = "Constant expected as the second and third inputs.";
+                    return false;
+                }
+            }
+        } catch (...) {
+            return false;
         }
-        x *= polynom;
-        polynom = 1.0f;
-        for (float c : erfDenominator) {
-            polynom = polynom * x2 + c;
-        }
-        return x / polynom;
+        return true;
     }
 
-public:
-    explicit MathImpl(const CNNLayer* layer) {
+    explicit MathImpl(const std::shared_ptr<ngraph::Node>& op) :
+            alpha(0.f), beta(0.f), gamma(0.f) {
         try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
 
-            if (layer->insData.size() != 1)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
+            initializers[op->get_type_info()](op, *this);
 
-            if (layer->insData[0].lock()->getTensorDesc().getDims() != layer->outData[0]->getTensorDesc().getDims())
-                IE_THROW() << layer->name << " Incorrect number of input/output dimensions!";
-
-            alpha = layer->GetParamAsFloat("alpha", 0.0f);
-            beta = layer->GetParamAsFloat("beta", 0.0f);
-            gamma = layer->GetParamAsFloat("gamma", 0.0f);
-
-            std::string math_func = layer->type;
-            if (math_func == "Erf") mathFunction = Math::Erf;
-            else if (math_func == "Abs") mathFunction = Math::Abs;
-            else if (math_func == "Acos") mathFunction = Math::Acos;
-            else if (math_func == "Acosh") mathFunction = Math::Acosh;
-            else if (math_func == "Asin") mathFunction = Math::Asin;
-            else if (math_func == "Asinh") mathFunction = Math::Asinh;
-            else if (math_func == "Atan") mathFunction = Math::Atan;
-            else if (math_func == "Atanh") mathFunction = Math::Atanh;
-            else if (math_func == "Ceil") mathFunction = Math::Ceil;
-            else if (math_func == "Ceiling") mathFunction = Math::Ceil;
-            else if (math_func == "Cos") mathFunction = Math::Cos;
-            else if (math_func == "Cosh") mathFunction = Math::Cosh;
-            else if (math_func == "Floor") mathFunction = Math::Floor;
-            else if (math_func == "HardSigmoid") mathFunction = Math::HardSigmoid;
-            else if (math_func == "Log") mathFunction = Math::Log;
-            else if (math_func == "Neg") mathFunction = Math::Neg;
-            else if (math_func == "Reciprocal") mathFunction = Math::Reciprocal;
-            else if (math_func == "Selu") mathFunction = Math::Selu;
-            else if (math_func == "Sign") mathFunction = Math::Sign;
-            else if (math_func == "Sin") mathFunction = Math::Sin;
-            else if (math_func == "Sinh") mathFunction = Math::Sinh;
-            else if (math_func == "SoftPlus") mathFunction = Math::SoftPlus;
-            else if (math_func == "Softsign") mathFunction = Math::Softsign;
-            else if (math_func == "Tan") mathFunction = Math::Tan;
-            else
-                IE_THROW() << layer->name << " Incorrect Math layer type!";
-
-            addConfig(layer, {DataConfigurator(ConfLayout::PLN, false, 0, Precision::FP32)}, {DataConfigurator(ConfLayout::PLN, false, 0, Precision::FP32)});
+            if (MKLDNNPlugin::one_of(op->get_type_info(),
+                    ngraph::op::v0::HardSigmoid::type_info,
+                    ngraph::op::v0::Selu::type_info)) {
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
+            } else {
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
+            }
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
+            throw;
         }
     }
 
@@ -99,90 +79,85 @@ public:
         float* dst_data = outputs[0]->cbuffer().as<float *>() +
             outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
 
-        switch (mathFunction) {
-        case Math::Erf:
-            parallel_for(dataSize, [&](size_t i) {
-                dst_data[i] = error_function(src_data[i]);
-            });
-            break;
-        case Math::Abs:
+        switch (getAlgorithm()) {
+        case MKLDNNPlugin::MathAbs:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = (std::abs)(src_data[i]);
             });
             break;
-        case Math::Acos:
+        case MKLDNNPlugin::MathAcos:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = acosf(src_data[i]);
             });
             break;
-        case Math::Acosh:
+        case MKLDNNPlugin::MathAcosh:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = acoshf(src_data[i]);
             });
             break;
-        case Math::Asin:
+        case MKLDNNPlugin::MathAsin:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = asinf(src_data[i]);
             });
             break;
-        case Math::Asinh:
+        case MKLDNNPlugin::MathAsinh:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = asinhf(src_data[i]);
             });
             break;
-        case Math::Atan:
+        case MKLDNNPlugin::MathAtan:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = atanf(src_data[i]);
             });
             break;
-        case Math::Atanh:
+        case MKLDNNPlugin::MathAtanh:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = atanhf(src_data[i]);
             });
             break;
-        case Math::Ceil:
+        case MKLDNNPlugin::MathCeiling:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = ceilf(src_data[i]);
             });
             break;
-        case Math::Cos:
+        case MKLDNNPlugin::MathCos:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = cosf(src_data[i]);
             });
             break;
-        case Math::Cosh:
+        case MKLDNNPlugin::MathCosh:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = coshf(src_data[i]);
             });
             break;
-        case Math::Floor:
+        case MKLDNNPlugin::MathFloor:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = floorf(src_data[i]);
             });
             break;
-        case Math::HardSigmoid:
+        case MKLDNNPlugin::MathHardSigmoid:
             alpha = (alpha == 0.0f) ? 0.2f : alpha;
             beta = (beta == 0.0f) ? 0.5f : beta;
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = (std::max)(0.f, (std::min)(1.f, alpha * src_data[i] + beta));
             });
             break;
-        case Math::Log:
+        case MKLDNNPlugin::MathLog:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = logf(src_data[i]);
             });
             break;
-        case Math::Neg:
+        case MKLDNNPlugin::MathNegative:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = -src_data[i];
             });
             break;
-        case Math::Reciprocal:
+        case MKLDNNPlugin::MathReciprocal:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = 1.0f / src_data[i];
             });
             break;
-        case Math::Selu:
+        case MKLDNNPlugin::MathSelu:
             alpha = (alpha == 0.0f) ? 1.67326f : alpha;
             gamma = (gamma == 0.0f) ? 1.0507f : gamma;
             parallel_for(dataSize, [&](size_t i) {
@@ -190,7 +165,7 @@ public:
                 dst_data[i] = (x > 0.0f) ? (gamma * x) : (gamma * alpha * (exp(x) - 1.0f));
             });
             break;
-        case Math::Sign:
+        case MKLDNNPlugin::MathSign:
             parallel_for(dataSize, [&](size_t i) {
                 if (src_data[i] > 0.0f)
                     dst_data[i] = 1.0f;
@@ -200,28 +175,28 @@ public:
                     dst_data[i] = 0.0f;
             });
             break;
-        case Math::Sin:
+        case MKLDNNPlugin::MathSin:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = sinf(src_data[i]);
             });
             break;
-        case Math::Sinh:
+        case MKLDNNPlugin::MathSinh:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = sinhf(src_data[i]);
             });
             break;
-        case Math::SoftPlus:
+        case MKLDNNPlugin::MathSoftPlus:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = logf(expf(src_data[i]) + 1);
             });
             break;
-        case Math::Softsign:
+        case MKLDNNPlugin::MathSoftsign:
             parallel_for(dataSize, [&](size_t i) {
                 float x = src_data[i];
                 dst_data[i] = x / (1.f + (std::abs)(x));
             });
             break;
-        case Math::Tan:
+        case MKLDNNPlugin::MathTan:
             parallel_for(dataSize, [&](size_t i) {
                 dst_data[i] = tanf(src_data[i]);
             });
@@ -237,38 +212,80 @@ public:
     }
 
 private:
-    enum class Math {
-        Abs,
-        Acos,
-        Acosh,
-        Asin,
-        Asinh,
-        Atan,
-        Atanh,
-        Ceil,
-        Cos,
-        Cosh,
-        Erf,
-        Floor,
-        HardSigmoid,
-        Log,
-        Neg,
-        Reciprocal,
-        Selu,
-        Sign,
-        Sin,
-        Sinh,
-        SoftPlus,
-        Softsign,
-        Tan
-    };
+    static std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MathImpl& node)>> initializers;
 
-    Math mathFunction = Math::Erf;
     float alpha = 0.0f;
     float beta = 0.0f;
     float gamma = 0.0f;
 };
 
+std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MathImpl& node)>> MathImpl::initializers = {
+    {ngraph::op::v0::Abs::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAbs;
+    }},
+    {ngraph::op::v0::Acos::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAcos;
+    }},
+    {ngraph::op::v3::Acosh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAcosh;
+    }},
+    {ngraph::op::v0::Asin::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAsin;
+    }},
+    {ngraph::op::v3::Asinh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAsinh;
+    }},
+    {ngraph::op::v0::Atan::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAtan;
+    }},
+    {ngraph::op::v0::Ceiling::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathCeiling;
+    }},
+    {ngraph::op::v0::Cos::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathCos;
+    }},
+    {ngraph::op::v0::Cosh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathCosh;
+    }},
+    {ngraph::op::v0::Floor::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathFloor;
+    }},
+    {ngraph::op::v0::HardSigmoid::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathHardSigmoid;
+        node.alpha = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<float>()[0];
+        node.beta = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<float>()[0];
+    }},
+    {ngraph::op::v0::Log::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathLog;
+    }},
+    {ngraph::op::v0::Negative::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathNegative;
+    }},
+    {ngraph::op::v0::Selu::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSelu;
+        node.alpha = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<float>()[0];
+        node.gamma = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<float>()[0];
+    }},
+    {ngraph::op::v0::Sign::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSign;
+    }},
+    {ngraph::op::v0::Sin::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSin;
+    }},
+    {ngraph::op::v0::Sinh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSinh;
+    }},
+    {ngraph::op::v4::SoftPlus::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathSoftPlus;
+    }},
+    {ngraph::op::v0::Tan::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathTan;
+    }},
+    {ngraph::op::v3::Atanh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MathImpl& node) {
+        node.algorithm = MKLDNNPlugin::MathAtanh;
+    }}
+};
+
 REG_FACTORY_FOR(MathImpl, Abs);
 REG_FACTORY_FOR(MathImpl, Acos);
 REG_FACTORY_FOR(MathImpl, Acosh);
@@ -280,7 +297,6 @@ REG_FACTORY_FOR(MathImpl, Ceil);
 REG_FACTORY_FOR(MathImpl, Ceiling);
 REG_FACTORY_FOR(MathImpl, Cos);
 REG_FACTORY_FOR(MathImpl, Cosh);
-REG_FACTORY_FOR(MathImpl, Erf);
 REG_FACTORY_FOR(MathImpl, Floor);
 REG_FACTORY_FOR(MathImpl, HardSigmoid);
 REG_FACTORY_FOR(MathImpl, Log);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
new file mode 100644
index 00000000000..e2616f43c99
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
@@ -0,0 +1,237 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "utils/bfloat16.hpp"
+#include <mkldnn_selective_build.h>
+#include "mkldnn_batch_to_space_node.h"
+#include <nodes/common/tensor_desc_creator.h>
+#include <ngraph/opsets/opset2.hpp>
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNBatchToSpaceNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto batchToSpace = std::dynamic_pointer_cast<const ngraph::opset2::BatchToSpace>(op);
+        if (!batchToSpace) {
+            errorMessage = "Only opset2 BatchToSpace operation is supported";
+            return false;
+        }
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1)) == nullptr ||
+            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2)) == nullptr ||
+            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(3)) == nullptr) {
+            errorMessage = "Only constant 'block_shape', 'crops_begin', 'crops_end' are supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNBatchToSpaceNode::MKLDNNBatchToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = "BatchToSpace layer with name '" + op->get_friendly_name() + "'";
+
+    if (op->get_input_size() != 4 || op->get_output_size() != 1)
+        IE_THROW() << errorPrefix << " has incorrect number of input or output edges!";
+
+    inDims = op->get_input_shape(0);
+    outDims = op->get_output_shape(0);
+    if (inDims.size() < 4 || inDims.size() > 5)
+        IE_THROW() << errorPrefix << " has unsupported 'data' input rank: " << inDims.size();
+    if (inDims.size() != outDims.size())
+        IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions";
+
+    blockShapeIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<size_t>();
+    cropsBeginIn  = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<size_t>();
+}
+
+void MKLDNNBatchToSpaceNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    const auto precision = getOriginalInputPrecisionAtPort(0);
+    const std::set<size_t> supported_precision_sizes = {1, 2, 4, 8};
+    if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end())
+        IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name();
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::nspc, precision},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp}},
+                         {{TensorDescCreatorTypes::nspc, precision}},
+                         impl_desc_type::ref_any);
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp}},
+                         {{TensorDescCreatorTypes::ncsp, precision}},
+                         impl_desc_type::ref_any);
+    if (inDims[1] % 8 == 0) {
+        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp}},
+                             {{TensorDescCreatorTypes::nCsp8c, precision}},
+                             impl_desc_type::ref_any);
+    }
+    if (inDims[1] % 16 == 0) {
+        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp}},
+                             {{TensorDescCreatorTypes::nCsp16c, precision}},
+                             impl_desc_type::ref_any);
+    }
+}
+
+static std::vector<size_t> getShape5D(const SizeVector &shape) {
+    std::vector<size_t> shape5D(5, 1);
+    for (int i = 0; i < 2; i++) {
+        shape5D[i] = shape[i];
+        shape5D[4 - i] = shape[shape.size() - 1 - i];
+    }
+    shape5D[2] = shape.size() == 5 ? shape[2] : shape5D[2];
+    return shape5D;
+}
+
+template<typename T>
+void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
+    const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    const auto layout = getParentEdgeAt(0)->getDesc().getLayout();
+    const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC;
+    const auto dimsSize = inDims.size();
+
+    auto inShape5D = getShape5D(inDims);
+    auto outShape5D = getShape5D(outDims);
+    auto blockShape = getShape5D(blockShapeIn);
+
+    if (layout == NHWC || layout == NDHWC) {
+        inShape5D.push_back(inShape5D[1]);
+        inShape5D.erase(inShape5D.begin() + 1);
+        outShape5D.push_back(outShape5D[1]);
+        outShape5D.erase(outShape5D.begin() + 1);
+        blockShape.push_back(blockShape[1]);
+        blockShape.erase(blockShape.begin() + 1);
+    }
+
+    const size_t blockSize = blocked ? getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back() : 1lu;
+    const size_t blockCountInput = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
+    const size_t blockCountOutput = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
+    const auto blockRemainder = inShape5D[1] % blockSize;
+    const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;
+
+    const size_t inSpatialStep = inShape5D[2] * inShape5D[3] * inShape5D[4];
+    const size_t inBatchStep = (blocked ? blockSize * blockCountInput : inShape5D[1]) * inSpatialStep;
+
+    const size_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4];
+    const size_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep;
+
+    size_t channels = (inShape5D[1] / blockSize);
+    channels = channels == 0 ? 1 : channels;
+    const size_t workAmount = inShape5D[0] * channels;
+
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        size_t start(0lu), end(0lu);
+        splitter(workAmount, nthr, ithr, start, end);
+        std::vector<size_t> indxStart(2, 0);
+        std::vector<size_t> indxEnd(2, 0);
+        parallel_it_init(start, indxStart[0], inShape5D[0], indxStart[1], channels);
+        parallel_it_init((end - 1), indxEnd[0], inShape5D[0], indxEnd[1], channels);
+        std::vector<int64_t> oAdd(5, 1);
+        std::vector<size_t> begin(5, 0);
+        std::vector<size_t> finish(5, 1);
+        for (size_t i0 = indxStart[0]; i0 < indxEnd[0] + 1; ++i0) {
+            int64_t bIdx = i0 / outShape5D[0];
+            const size_t srcIdx0 = i0 * inBatchStep;
+            const size_t dstIdx0 = (i0 - (bIdx * outShape5D[0])) * outBatchStep;
+            oAdd[4] = bIdx % blockShapeIn[dimsSize - 1] - cropsBeginIn[dimsSize - 1];
+            bIdx /= blockShapeIn[dimsSize - 1];
+            oAdd[3] = bIdx % blockShapeIn[dimsSize - 2] - cropsBeginIn[dimsSize - 2];
+            bIdx /= blockShapeIn[dimsSize - 2];
+            oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - cropsBeginIn[2] : 0lu;
+            bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
+            oAdd[1] = bIdx % blockShapeIn[1] - cropsBeginIn[1];
+            if (layout == NHWC || layout == NDHWC) {
+                oAdd.push_back(oAdd[1]);
+                oAdd.erase(oAdd.begin() + 1);
+            }
+            begin[1] = (blockShape[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
+            finish[1] = (outShape5D[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
+            begin[2] = (blockShape[2] - 1 - oAdd[2]) / blockShape[2];
+            finish[2] = (outShape5D[2] - 1 - oAdd[2]) / blockShape[2];
+            begin[3] = (blockShape[3] - 1 - oAdd[3]) / blockShape[3];
+            finish[3] = (outShape5D[3] - 1 - oAdd[3]) / blockShape[3];
+            begin[4] = (blockShape[4] - 1 - oAdd[4]) / blockShape[4];
+            finish[4] = (outShape5D[4] - 1 - oAdd[4]) / blockShape[4];
+            const int64_t addTmpOC = blocked ? 0lu : oAdd[1];
+            const int64_t addTmpOc = blocked ? oAdd[1] : 0lu;
+            indxStart[1] = begin[1] > indxStart[1] ? begin[1] : indxStart[1];
+            const size_t lastI1 = i0 == indxEnd[0] ? (indxEnd[1] > finish[1] ? finish[1] : indxEnd[1]) : finish[1];
+            for (; indxStart[1] < lastI1 + 1; ++indxStart[1]) {
+                const size_t block = indxStart[1] == finish[1] ? lastBlock : blockSize;
+                const int64_t tmpOC = indxStart[1] * blockShape[1] + addTmpOC;
+                const size_t srcIdx1 = srcIdx0 + indxStart[1] * inSpatialStep * blockSize;
+                const size_t dstIdx1 = dstIdx0 + tmpOC * outSpatialStep * blockSize;
+                const size_t itEnd = blocked ? ((block - 1) * blockShape[1] + oAdd[1]) / blockSize : 0lu;
+                for (size_t i2 = begin[2]; i2 < finish[2] + 1; ++i2) {
+                    const int64_t tmpOd = i2 * blockShape[2] + oAdd[2];
+                    const size_t srcIdx2 = srcIdx1 + i2 * inShape5D[3] * inShape5D[4] * blockSize;
+                    const size_t dstIdx2 = dstIdx1 + tmpOd * outShape5D[3] * outShape5D[4] * blockSize;
+                    for (size_t i3 = begin[3]; i3 < finish[3] + 1; ++i3) {
+                        const int64_t tmpOh = i3 * blockShape[3] + oAdd[3];
+                        const size_t srcIdx3 = srcIdx2 + i3 * inShape5D[4] * blockSize;
+                        const size_t dstIdx3 = dstIdx2 + tmpOh * outShape5D[4] * blockSize;
+                        for (size_t i4 = begin[4]; i4 < finish[4] + 1; ++i4) {
+                            const int64_t tmpOw = i4 * blockShape[4] + oAdd[4];
+                            const size_t srcIdx4 = srcIdx3 + i4 * blockSize;
+                            const size_t dstIdx4 = dstIdx3 + tmpOw * blockSize;
+                            for (size_t it = 0; it < itEnd + 1; ++it) {
+                                const size_t i5Begin = it == 0 ? 0 : (it * blockSize - 1 - oAdd[1]) / blockShape[1] + 1;
+                                const size_t i5End = it == itEnd ? (block - 1) : ((it + 1) * blockSize - 1 - oAdd[1]) / blockShape[1];
+                                for (size_t i5 = i5Begin; i5 < i5End + 1; ++i5) {
+                                    const int64_t tmpOc = i5 * blockShape[1] + addTmpOc;
+                                    const size_t srcIdx5 = srcIdx4 + i5;
+                                    const size_t dstIdx5 =
+                                            dstIdx4 + it * outSpatialStep * blockSize + (tmpOc - it * blockSize);
+                                    dstData[dstIdx5] = srcData[srcIdx5];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            indxStart[1] = 0lu;
+        }
+    });
+}
+
+void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) {
+    switch (getParentEdgeAt(0)->getDesc().getPrecision().size()) {
+        case 1: batchToSpaceKernel<PrecisionTrait<Precision::U8>::value_type>();  break;
+        case 2: batchToSpaceKernel<PrecisionTrait<Precision::U16>::value_type>(); break;
+        case 4: batchToSpaceKernel<PrecisionTrait<Precision::I32>::value_type>(); break;
+        default:
+            IE_THROW() << "BatchToSpace layer does not support precision '" + std::string(getParentEdgeAt(0)->getDesc().getPrecision().name()) + "'";
+    }
+}
+
+bool MKLDNNBatchToSpaceNode::created() const {
+    return getType() == BatchToSpace;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNBatchToSpaceNode, BatchToSpace)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
new file mode 100644
index 00000000000..a7044d13218
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNBatchToSpaceNode : public MKLDNNNode {
+public:
+    MKLDNNBatchToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNBatchToSpaceNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    InferenceEngine::SizeVector inDims;
+    InferenceEngine::SizeVector outDims;
+    std::vector<size_t> blockShapeIn;
+    std::vector<size_t> cropsBeginIn;
+
+    std::string errorPrefix;
+
+    template<typename T>
+    void batchToSpaceKernel();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
deleted file mode 100644
index b885f8c0c15..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
+++ /dev/null
@@ -1,281 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mkldnn_batchnorm_node.h"
-#include <mkldnn_extension_utils.h>
-#include "common/cpu_memcpy.h"
-
-using namespace mkldnn;
-using namespace MKLDNNPlugin;
-using namespace InferenceEngine;
-
-MKLDNNBatchNormalizationNode::MKLDNNBatchNormalizationNode(const InferenceEngine::CNNLayerPtr& layer,
-                                                           const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        return GetVarianceDesc(primitive_desc_it);
-    });
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        return GetMeanDesc(primitive_desc_it);
-    });
-
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        if (!fusedWithScale())
-            return MKLDNNMemoryDesc();
-        return GetScaleShiftWeightsDesc(primitive_desc_it);
-    });
-}
-
-void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
-    if (!descs.empty())
-        return;
-    auto * bnLayer = dynamic_cast<BatchNormalizationLayer*>(getCnnLayer().get());
-    if (bnLayer == nullptr)
-        IE_THROW() << "Cannot convert batch normalization layer.";
-    if (bnLayer->_weights == nullptr || bnLayer->_biases == nullptr) {
-        IE_THROW() << "Weights/biases are empty for layer: " << bnLayer->name
-                           << " used in MKLDNN node: " << getName() << "\n"
-                           << "Use the second argumemt of InferenceEngine::Core::ReadNetwork"
-                           << " to load them from .bin part of the IR";
-    }
-
-    if (getParentEdges().size() != 1)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
-    if (!getChildEdges().size())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
-
-    eps = bnLayer->epsilon;
-
-    size_t variancesSize = MKLDNNDims(bnLayer->_weights->getTensorDesc().getDims()).size();
-    size_t meansSize = MKLDNNDims(bnLayer->_biases->getTensorDesc().getDims()).size();
-
-    if (variancesSize != meansSize && variancesSize != 1)
-        IE_THROW() << "Incorrect weights and biases sizes!";
-
-    internalBlobs.push_back(createInternalBlob(bnLayer->_weights->getTensorDesc().getDims(), true));
-    internalBlobs.push_back(createInternalBlob(bnLayer->_biases->getTensorDesc().getDims(), false));
-
-    auto parentOutDims = getParentEdgeAt(0)->getDims();
-
-    if (fusedWith.size() > 1)
-        IE_THROW() << "BatchNorm fusion is possible with only one layer!";
-
-    for (const auto &node : fusedWith) {
-        auto * scshLayer = dynamic_cast<ScaleShiftLayer*>(node->getCnnLayer().get());
-        if (scshLayer == nullptr)
-            IE_THROW() << "Cannot cast to the ScaleShift layer to fuse with BatchNorm.";
-
-        size_t C = static_cast<size_t>(getChildEdgeAt(0)->getDims()[1]);
-        SizeVector mkldnn_weights = {2, C};
-        TensorDesc desc(scshLayer->_weights->getTensorDesc().getPrecision(), mkldnn_weights, InferenceEngine::NC);
-        InferenceEngine::TBlob<float>::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
-        internalBlob->allocate();
-        float * data = internalBlob->buffer();
-        if (data == nullptr)
-            IE_THROW() << "Cannot get memory!";
-
-        InferenceEngine::Blob::Ptr blb = scshLayer->_weights;
-        if (blb == nullptr)
-            IE_THROW() << "Cannot get weights blob for node " << getName() << ".";
-
-        size_t weightsByteSize = blb->byteSize();
-        cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
-        data += blb->size();
-        blb = scshLayer->_biases;
-
-        if (blb == nullptr) {
-            memset(data, 0, weightsByteSize);
-        } else {
-            if (weightsByteSize != blb->byteSize())
-                IE_THROW() << "ScaleShift has incorrect weights!";
-            cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize);
-        }
-        internalBlobs.push_back(internalBlob);
-    }
-
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
-    if (precision != InferenceEngine::Precision::FP32)
-        precision = InferenceEngine::Precision::FP32;
-    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-
-    for (auto format : getAvailableFormatsForDims(parentOutDims)) {
-        MKLDNNMemoryDesc in_candidate(parentOutDims, inputDataType, format);
-        createDescriptor({in_candidate}, {});
-    }
-}
-
-static MKLDNNMemoryDesc get_bn_mdesc_by_index(const mkldnn::primitive_desc_iterator &primitive_desc, int idx) {
-    mkldnn_batch_normalization_desc_t *p;
-    error::wrap_c_api(mkldnn_primitive_desc_query(
-            primitive_desc.get(), mkldnn::convert_to_c(mkldnn::query::batch_normalization_d), 0, &p),
-                      "could not get a batch-normalization descriptor");
-    auto bndesc =
-            (p->flags & mkldnn::convert_to_c(mkldnn::normalization_flags::use_global_stats)) ?
-            primitive_desc.src_desc(idx) : primitive_desc.dst_desc(idx);
-
-    return MKLDNNMemoryDesc {bndesc};
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const mkldnn::primitive_desc &primitive_desc) const {
-    // TODO: rewrite with using stat_desc
-    return get_bn_mdesc_by_index(primitive_desc, 2);
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const mkldnn::primitive_desc &primitive_desc) const {
-    return get_bn_mdesc_by_index(primitive_desc, 1);
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetScaleShiftWeightsDesc(const mkldnn::primitive_desc &primitive_desc) const {
-    return MKLDNNMemoryDesc(primitive_desc.weights_desc(0));
-}
-
-bool MKLDNNBatchNormalizationNode::created() const {
-    return getType() == BatchNormalization;
-}
-
-void MKLDNNBatchNormalizationNode::createPrimitive() {
-    if (prim)
-        return;
-
-    auto prim_desc = createPrimitiveDescriptor<batch_normalization_forward::primitive_desc,
-            batch_normalization_forward::desc>();
-    prim.reset(new batch_normalization_forward(prim_desc));
-
-    auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-
-    const auto &mean = internalBlobMemory[1]->GetPrimitive();
-    const auto &var = internalBlobMemory[0]->GetPrimitive();
-
-    if (convert_to_c(flag) & dnnl_use_scaleshift) {
-        const auto &sclshft = internalBlobMemory[2]->GetPrimitive();
-        primArgs = {{DNNL_ARG_SRC, src},
-                    {DNNL_ARG_MEAN, mean},
-                    {DNNL_ARG_VARIANCE, var},
-                    {DNNL_ARG_SCALE_SHIFT, sclshft},
-                    {DNNL_ARG_DST, dst}};
-    } else {
-        primArgs = {{DNNL_ARG_SRC, src},
-                    {DNNL_ARG_MEAN, mean},
-                    {DNNL_ARG_VARIANCE, var},
-                    {DNNL_ARG_DST, dst}};
-    }
-}
-
-void MKLDNNBatchNormalizationNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
-                                                    const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
-    MKLDNNMemoryDesc inDesc(inputDesc[0]);
-    if (inDesc.getDims().ndims() == 2) {
-        // Make it 4D
-        MKLDNNDims dims = inDesc.getDims();
-        dims.push_back(1);  // H
-        dims.push_back(1);  // W
-        auto format = memory::format_tag::nchw;
-        inDesc = MKLDNNMemoryDesc(dims, inDesc.getDataType(), format);
-    }
-
-    flag = normalization_flags::use_global_stats;
-    if (fusedWithScale())
-        flag |= normalization_flags::use_scale_shift;
-
-    MKLDNNDescriptor desc(std::shared_ptr<batch_normalization_forward::desc>(
-            new mkldnn::batch_normalization_forward::desc(prop_kind::forward_scoring, inDesc, eps,
-                                                  flag)));
-    descs.push_back(desc);
-}
-
-void MKLDNNBatchNormalizationNode::initOptimalPrimitiveDescriptor() {
-    auto selected_pd = getSelectedPrimitiveDescriptor();
-    if (selected_pd == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
-    auto config = selected_pd->getConfig();
-    if (isInitConfig(config))
-        return;
-
-    if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || (!isUninitTensorDesc(config.inConfs[0].desc) &&
-            !isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc))
-        IE_THROW() << "Layer " << getName() << " has incorrect selected config!";
-
-    if (!isUninitTensorDesc(config.inConfs[0].desc)) {
-        config.outConfs[0].desc = config.inConfs[0].desc;
-    } else if (!isUninitTensorDesc(config.outConfs[0].desc)) {
-        config.inConfs[0].desc = config.outConfs[0].desc;
-    } else {
-        config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0);
-    }
-
-    initDescriptor(config);
-}
-
-void MKLDNNBatchNormalizationNode::initSupportedPrimitiveDescriptors() {
-    if (!supportedPrimitiveDescriptors.empty())
-        return;
-
-    // BN primitive doesn't support strides
-    for (auto& desc : descs) {
-        primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine());
-        while (static_cast<bool>(itpd)) {
-            InferenceEngine::LayerConfig config;
-            config.dynBatchSupport = true;
-            for (size_t i = 0; i < desc.inputNumbers(); i++) {
-                InferenceEngine::DataConfig dataConfig;
-                dataConfig.inPlace = -1;
-                dataConfig.constant = false;
-                dataConfig.desc = getSrcMemDesc(itpd, i);
-                config.inConfs.push_back(dataConfig);
-            }
-
-            for (size_t i = 0; i < desc.outputNumbers(); i++) {
-                InferenceEngine::DataConfig dataConfig;
-                dataConfig.inPlace = canBeInPlace() ? 0 : -1;
-                dataConfig.constant = false;
-                dataConfig.desc = getDstMemDesc(itpd, i);
-                config.outConfs.push_back(dataConfig);
-            }
-            impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
-
-            supportedPrimitiveDescriptors.emplace_back(config, impl_type);
-            if (!itpd.next_impl())
-                break;
-        }
-    }
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it,
-                                                             size_t idx) {
-    TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx));
-
-    if (getParentEdgeAt(0)->getDims().ndims() == 2 && desc.getLayout() == InferenceEngine::Layout::NCHW) {
-        desc.reshape(getParentEdgeAt(idx)->getDims().ToSizeVector(), InferenceEngine::Layout::NC);
-        return MKLDNNMemoryDesc(desc);
-    }
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    else
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
-}
-
-MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it,
-                                                             size_t idx) {
-    TensorDesc desc =  MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx));
-
-    if (getParentEdgeAt(0)->getDims().ndims() == 2 && desc.getLayout() == InferenceEngine::Layout::NCHW) {
-        desc.reshape(getParentEdgeAt(idx)->getDims().ToSizeVector(), InferenceEngine::Layout::NC);
-        return MKLDNNMemoryDesc(desc);
-    }
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    else
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
-}
-
-REG_MKLDNN_PRIM_FOR(MKLDNNBatchNormalizationNode, BatchNormalization);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h
deleted file mode 100644
index 46d79425fb2..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ie_common.h>
-#include <mkldnn_node.h>
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace MKLDNNPlugin {
-
-class MKLDNNBatchNormalizationNode : public MKLDNNNode {
-public:
-    MKLDNNBatchNormalizationNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
-            MKLDNNWeightsSharing::Ptr &cache);
-
-    ~MKLDNNBatchNormalizationNode() override = default;
-    void initSupportedPrimitiveDescriptors() override;
-    void initOptimalPrimitiveDescriptor() override;
-    void getSupportedDescriptors() override;
-    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
-    void createPrimitive() override;
-    bool created() const override;
-
-    bool fusedWithScale() const {return fusedWith.size() == 1 && fusedWith[0]->getType() == Eltwise
-                                        && fusedWith[0]->getCnnLayer()->type == "ScaleShift";}
-
-    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
-    MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
-
-private:
-    mkldnn::normalization_flags flag = mkldnn::normalization_flags::none;
-    float eps = 0.0f;
-    MKLDNNMemoryDesc GetVarianceDesc(const mkldnn::primitive_desc& primitive_desc) const;
-    MKLDNNMemoryDesc GetMeanDesc(const mkldnn::primitive_desc& primitive_desc) const;
-    MKLDNNMemoryDesc GetScaleShiftWeightsDesc(const mkldnn::primitive_desc& primitive_desc) const;
-};
-
-}  // namespace MKLDNNPlugin
-
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
index 1738d1798a9..517066d6f32 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
@@ -6,20 +6,19 @@
 #include "mkldnn_reorder_node.h"
 #include "mkldnn_input_node.h"
 #include "mkldnn_eltwise_node.h"
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_conv_node.h"
-#include <legacy/ie_layers.h>
 #include <string>
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <legacy/ie_layers_internal.hpp>
 #include "ie_parallel.hpp"
 #include "cpu/x64/jit_generator.hpp"
 #include "cpu/x64/jit_uni_eltwise_injector.hpp"
 #include "cpu/x64/jit_uni_depthwise_injector.hpp"
 #include "cpu/x64/cpu_isa_traits.hpp"
 #include "utils/general_utils.h"
+#include <ngraph/opsets/opset1.hpp>
 
 // WA for xbyak.h
 #ifdef _WIN32
@@ -873,17 +872,52 @@ private:
     }
 };
 
-MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer,
+bool MKLDNNBinaryConvolutionNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto binConv = std::dynamic_pointer_cast<const ngraph::opset1::BinaryConvolution>(op);
+        if (!binConv) {
+            errorMessage = "Only opset1 BinaryConvolution operation is supported";
+            return false;
+        }
+        if (binConv->get_mode() != ngraph::op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT) {
+            errorMessage = "Doesn't support mode: " + ngraph::as_string(binConv->get_mode());
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const std::shared_ptr<ngraph::Node>& op,
                                                          const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {
-    if (mayiuse(x64::avx512_common)) {
-        implType = impl_desc_type::jit_avx512;
-    } else if (mayiuse(x64::avx2)) {
-        implType = impl_desc_type::jit_avx2;
-    } else if (mayiuse(x64::sse41)) {
-        implType = impl_desc_type::jit_sse42;
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "BinaryConvolution node with name '" + getName() + "' ";
+        const auto binConv = std::dynamic_pointer_cast<const ngraph::opset1::BinaryConvolution>(op);
+
+        pad_value = binConv->get_pad_value();
+        for (int i = 0; i < binConv->get_strides().size(); i++) {
+            stride.push_back(static_cast<ptrdiff_t>(binConv->get_strides()[i]));
+        }
+        for (int i = 0; i < binConv->get_dilations().size(); i++) {
+            dilation.push_back(static_cast<ptrdiff_t>(binConv->get_dilations()[i]) - 1);
+        }
+        paddingL = binConv->get_pads_begin();
+        paddingR = binConv->get_pads_end();
+
+        if (mayiuse(x64::avx512_common)) {
+            implType = impl_desc_type::jit_avx512;
+        } else if (mayiuse(x64::avx2)) {
+            implType = impl_desc_type::jit_avx2;
+        } else if (mayiuse(x64::sse41)) {
+            implType = impl_desc_type::jit_sse42;
+        } else {
+            implType = impl_desc_type::ref;
+        }
     } else {
-        implType = impl_desc_type::ref;
+        IE_THROW(NotImplemented) << errorMessage;
     }
 }
 
@@ -891,28 +925,17 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
     if (!descs.empty())
         return;
 
-    auto* binConvLayer = dynamic_cast<BinaryConvolutionLayer*>(getCnnLayer().get());
-    if (binConvLayer == nullptr)
-        IE_THROW() << "Cannot convert convolution layer.";
-
-    std::string errorPrefix = "BinaryConvolution layer with name '" + getName() + "' ";
-
-    withBinarization = isFusedWith(Quantize);
+    withBinarization = isFusedWith(FakeQuantize);
     withSum = false;
     int expectedInputEdgesNum = 2;
     for (int i = 0; i < fusedWith.size(); i++) {
         auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
-        if (eltwiseNode && eltwiseNode->isSum()) {
+        if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
             withSum = true;
             expectedInputEdgesNum++;
         }
     }
 
-    group = binConvLayer->_group;
-    if (group != 1) {
-        IE_THROW() << errorPrefix << "doesn't support parameter group != 1";
-    }
-
     if (getParentEdges().size() != expectedInputEdgesNum)
         IE_THROW() << errorPrefix << "has incorrect number of input edges";
 
@@ -930,21 +953,6 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
     if (getChildEdgeAt(0)->getDims().ndims() != 4) {
         IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims();
     }
-
-    if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
-        IE_THROW() << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
-    }
-
-    pad_value = binConvLayer->_pad_value;
-
-    invertVectorCopyUtoI(binConvLayer->_stride, stride);
-    for (int i = 1; i <= binConvLayer->_dilation.size(); i++) {
-        dilation.push_back(static_cast<int>(binConvLayer->_dilation[binConvLayer->_dilation.size() - i]) - 1);
-    }
-
-    auto allPads = getPaddings(*binConvLayer);
-    invertVectorCopyUtoI(allPads.begin, paddingL);
-    invertVectorCopyUtoI(allPads.end, paddingR);
 }
 
 void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
@@ -1077,48 +1085,18 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() {
 }
 
 bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
-    auto isOneOf = [](EltwiseOpType alg, std::vector<EltwiseOpType> algs) {
-        for (auto a : algs) {
-            if (alg == a) {
-                return true;
-            }
-        }
-        return false;
-    };
-
     if (implType == impl_desc_type::ref)
         return false;
 
     // Binarization have to be last operation in fusing chain
-    if (isFusedWith(Quantize))
+    if (isFusedWith(FakeQuantize))
         return false;
 
-    if (node->getType() == Quantize) {
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot get quantize node " << node->getName();
-        return quantizeNode->isBinarization();
-    } else if (node->getType() == Eltwise) {
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
-        if (eltwiseNode == nullptr)
-            IE_THROW() << "Cannot get eltwise node " << node->getName();
-
-        // Only one Add operation can be fused since it is implemented via output blob reuse
-        if (eltwiseNode->isSum()) {
-            for (auto& fusedNode : fusedWith) {
-                auto* fusedEltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(fusedNode.get());
-                if (fusedEltwiseNode->isSum()) {
-                    return false;
-                }
-            }
-        }
-
-        return eltwiseNode->isSum() ||
-               isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, SoftRelu,
-                                                  Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt});
+    if (node->getType() == FakeQuantize) {
+        return node->getAlgorithm() == FQBinarization;
+    } else {
+        return canFuseSimpleOperation(node);
     }
-
-    return false;
 }
 
 void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
@@ -1127,16 +1105,16 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
     for (auto &node : fusedWith) {
         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
         if (eltwiseNode) {
-            if (eltwiseNode->isSum())
+            if (eltwiseNode->isSpecialConvolutionAddFusing())
                 ops.append_sum(1.0);
             else
                 eltwiseNode->appendPostOps(ops);
             continue;
         }
 
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode) {
-            quantizeNode->appendPostOps(ops);
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
+        if (fakeQuantizeNode) {
+            fakeQuantizeNode->appendPostOps(ops);
             continue;
         }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h
index 60892aae208..b8a93caf4c5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h
@@ -74,7 +74,7 @@ struct jit_uni_bin_conv_kernel {
 
 class MKLDNNBinaryConvolutionNode : public MKLDNNNode {
 public:
-    MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNBinaryConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNBinaryConvolutionNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -86,7 +86,11 @@ public:
         return false;
     }
     void setPostOps(mkldnn::primitive_attr &attr);
-    bool canFuse(const MKLDNNNodePtr& node) const;
+    bool canFuse(const MKLDNNNodePtr& node) const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+    impl_desc_type getImplType() { return implType; }
 
 private:
     bool withSum = false;
@@ -112,6 +116,8 @@ private:
                           const std::vector<size_t>& s_str, const std::vector<size_t>& w_str, const std::vector<size_t>& d_str);
     void executeReference(const uint8_t* src, const uint8_t* weights, uint8_t* dst,
                           const std::vector<size_t>& s_str, const std::vector<size_t>& w_str, const std::vector<size_t>& d_str);
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
new file mode 100644
index 00000000000..3d9815d48c1
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
@@ -0,0 +1,133 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "utils/bfloat16.hpp"
+#include <mkldnn_selective_build.h>
+#include "mkldnn_broadcast_node.h"
+#include <nodes/common/tensor_desc_creator.h>
+#include <ngraph/opsets/opset1.hpp>
+#include "common/cpu_memcpy.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto broadcast = std::dynamic_pointer_cast<const ngraph::opset1::Broadcast>(op);
+        if (!broadcast) {
+            errorMessage = "Only opset1 Broadcast operation is supported";
+            return false;
+        }
+        if (broadcast->get_broadcast_spec() != ngraph::op::AutoBroadcastSpec::NUMPY) {
+            errorMessage = "Only NUMPY broadcast type is supported";
+            return false;
+        }
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(broadcast->get_input_node_shared_ptr(BROADCAST_SHAPE)) == nullptr) {
+            errorMessage = "Only const 'shape' input is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = "Broadcast node with name '" + op->get_friendly_name() + "'";
+    if (op->get_input_size() != 2 || op->get_output_size() != 1)
+        IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+    SizeVector shape_dims = op->get_input_shape(BROADCAST_SHAPE);
+    if (shape_dims.size() > 1)
+        IE_THROW() << errorPrefix << " has incorrect 'shape' input rank: " << shape_dims.size();
+}
+
+void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    Precision prec = getOriginalInputPrecisionAtPort(BROADCAST_INPUT);
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, prec},
+                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                         {{TensorDescCreatorTypes::ncsp, prec}},
+                         impl_desc_type::ref_any);
+}
+
+void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
+    size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getDesc().getDims())[0];
+    SizeVector dst_dims = getChildEdgeAt(0)->getDesc().getDims();
+    SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getDims();
+    SizeVector srcStrides = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getBlockingDesc().getStrides();
+    size_t data_size = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getPrecision().size();
+
+    if (!src_dims.size())
+        src_dims = SizeVector(1, 1);
+    if (!srcStrides.size())
+        srcStrides = SizeVector(1, 1);
+
+    if (dst_dims.size() != shape_size) {
+        IE_THROW() << "Output tensor dimension mismatch";
+    }
+
+    if (src_dims.size() > dst_dims.size()) {
+        IE_THROW() << "Output tensor dimension is smaller then input tensor dimension";
+    }
+
+    InferenceEngine::SizeVector dstStrides = getChildEdgeAt(0)->getDesc().getBlockingDesc().getStrides();
+    InferenceEngine::SizeVector src_aligned(dst_dims.size());
+    InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
+    size_t prefix_size = dst_dims.size() - src_dims.size();
+    for (size_t i = 0; i < dst_dims.size(); i++) {
+        if (i < prefix_size) {
+            src_aligned[i] = 1;
+            srcStrides_aligned[i] = srcStrides[0];
+        } else {
+            src_aligned[i] = src_dims[i - prefix_size];
+            srcStrides_aligned[i] = srcStrides[i - prefix_size];
+        }
+    }
+
+    size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+    const auto *src_data = reinterpret_cast<const uint8_t *>(getParentEdgeAt(BROADCAST_INPUT)->getMemoryPtr()->GetPtr());
+    auto *dst_data = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        size_t i, src_idx, start = 0, end = 0;
+        SizeVector counters(dst_dims.size(), 0);
+        splitter(work_amount_dst, nthr, ithr, start, end);
+        for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
+            counters[j] = i % dst_dims[j];
+            i /= dst_dims[j];
+        }
+        for (size_t iwork = start * data_size; iwork < end * data_size; iwork += data_size) {
+            for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
+                src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
+
+            cpu_memcpy(&dst_data[iwork], &src_data[src_idx * data_size], data_size);
+
+            for (int j = dst_dims.size() - 1; j >= 0; j--) {
+                counters[j] = (counters[j] + 1) % dst_dims[j];
+                if (counters[j] != 0) break;
+            }
+        }
+    });
+}
+
+bool MKLDNNBroadcastNode::created() const {
+    return getType() == Broadcast;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNBroadcastNode, Broadcast)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h
new file mode 100644
index 00000000000..e9d9315b033
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h
@@ -0,0 +1,35 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNBroadcastNode : public MKLDNNNode {
+public:
+    MKLDNNBroadcastNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNBroadcastNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    static const size_t BROADCAST_INPUT = 0;
+    static const size_t BROADCAST_SHAPE = 1;
+
+    std::string errorPrefix;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
index 63d51589ad4..aa9d7b8fd98 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
@@ -9,7 +9,6 @@
 #include <vector>
 #include <mkldnn_extension_utils.h>
 
-#include <legacy/ie_layers.h>
 #include "mkldnn.hpp"
 #include "mkldnn/iml_type_mapper.h"
 #include "mkldnn_dims.h"
@@ -17,7 +16,7 @@
 #include "mkldnn_memory.h"
 #include "ie_parallel.hpp"
 #include "mkldnn_conv_node.h"
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_pooling_node.h"
 #include "mkldnn_eltwise_node.h"
 #include <limits>
@@ -27,21 +26,37 @@ using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNConcatNode::MKLDNNConcatNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+
+bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto concatOp = ngraph::as_type_ptr<const ngraph::op::v0::Concat>(op);
+        if (!concatOp) {
+            errorMessage = "Node is not an instance of the Concat operation.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    auto concatOp = ngraph::as_type_ptr<ngraph::op::v0::Concat>(op);
+    auto axis = concatOp->get_axis();
+    if (axis < 0) {
+        this->axis = concatOp->get_input_shape(0).size() + axis;
+    } else {
+        this->axis = axis;
+    }
+}
 
 void MKLDNNConcatNode::getSupportedDescriptors() {
-    auto * conLayer = dynamic_cast<ConcatLayer*>(getCnnLayer().get());
-
-    if (conLayer == nullptr)
-        IE_THROW() << "Cannot convert concat layer.";
-
-    axis = conLayer->_axis;
-
-    if (getParentEdges().empty())
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
-    if (getChildEdges().empty())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
     auto& firstParentDims = getParentEdgeAt(0)->getDims();
     for (size_t i = 1; i < getParentEdges().size(); i++) {
         auto& dims = getParentEdgeAt(i)->getDims();
@@ -64,10 +79,11 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
+    auto& originInputPrecisions = getOriginalInputPrecisions();
+    inputPrecision = originInputPrecisions[0];
     bool isMixedPrecision = false;
-    for (int i = 1; i < getCnnLayer()->insData.size(); i++) {
-        if (getCnnLayer()->insData[0].lock()->getPrecision() != getCnnLayer()->insData[i].lock()->getPrecision()) {
+    for (int i = 1; i < getOriginalInputsNumber(); i++) {
+        if (originInputPrecisions[0] != originInputPrecisions[i]) {
             isMixedPrecision = true;
             break;
         }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
index d337232a592..ffb8ce22feb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
@@ -13,9 +13,10 @@ namespace MKLDNNPlugin {
 
 class MKLDNNConcatNode : public MKLDNNNode {
 public:
-    MKLDNNConcatNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNConcatNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNConcatNode() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
     void initOptimalPrimitiveDescriptor() override;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
index d0411382b2a..1c3da9e97cc 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@@ -6,97 +6,123 @@
 #include "mkldnn_reorder_node.h"
 #include "mkldnn_input_node.h"
 #include "mkldnn_eltwise_node.h"
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_pooling_node.h"
 #include "mkldnn_concat_node.h"
-#include <legacy/ie_layers.h>
 #include <string>
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <legacy/ie_layers_internal.hpp>
 #include <utils/general_utils.h>
+#include <ngraph/ops.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNConvolutionNode::MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache), withBiases(false), withSum(false), withDWConv(false), isDW(false), isMerged(false),
+bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (!ngraph::is_type<ngraph::op::v1::Convolution>(op) && !ngraph::is_type<ngraph::op::v1::GroupConvolution>(op)) {
+            errorMessage = "Only opset1 Convolution and GroupConvolution operations are supported";
+            return false;
+        }
+        size_t ndims = op->get_input_shape(0).size();
+        if ((ndims < 4) || (ndims > 5)) {
+            errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(ndims);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+
+    return true;
+}
+
+MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false),
           isGrouped(false), dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef),
-          groupNum(1lu), baseInputsNumber(1), eltwisePrecision(Precision::FP32) {
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0));
-    });
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        if (!withBiases)
-            return MKLDNNMemoryDesc();
-        return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1));
-    });
-
-    auto ws = layer->blobs.find("w-scale");
-    if (ws != layer->blobs.end()) {
-        wScale = ws->second;
+          groupNum(1lu), eltwisePrecision(Precision::FP32) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
     }
 
-    // Trying to find oi-scale
-    if (getCnnLayer()->type == "Convolution" && getCnnLayer()->precision == Precision::I8) {
-        auto ois = layer->blobs.find("oi-scale");
-        if ((getCnnLayer()->outData[0]->getPrecision() == Precision::I8 || getCnnLayer()->outData[0]->getPrecision() == Precision::U8)
-            && ois == layer->blobs.end()) {
-            IE_THROW() << "Internal error of graph quantization - mismatch of intermediate scales and next layer type for convolution "
-                << getCnnLayer()->name;
-        }
-        if (ois != layer->blobs.end()) {
-            // If we can find an oi-scale, then the next layer has to be an INT8.
-            oScale = ois->second;
-        }
-    }
+    isPrimitivesPriorityDefined = op->get_rt_info().count("PrimitivesPriority") != 0;
 
-    if (getCnnLayer()->type == "Convolution") {
-        baseInputsNumber = getCnnLayer().get()->insData.size();
+    auto convolutionOp = ngraph::as_type_ptr<ngraph::op::v1::Convolution>(op);
+    auto groupConvolutionOp = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolution>(op);
+
+    if (convolutionOp) {
+        algorithm = ConvolutionCommon;
+
+        groupNum = 1;
+        isGrouped = false;
+
+        weightDims = convolutionOp->input_value(1).get_shape();
+
+        IC = weightDims[1];
+        groupIC = IC;
+        groupOC = weightDims[0];
+
+        biasesDims = { groupOC };
+
+        for (int i = 0; i < convolutionOp->get_strides().size(); i++) {
+            stride.push_back(static_cast<ptrdiff_t>(convolutionOp->get_strides()[i]));
+        }
+        for (int i = 0; i < convolutionOp->get_dilations().size(); i++) {
+            dilation.push_back(static_cast<ptrdiff_t>(convolutionOp->get_dilations()[i]) - 1);
+        }
+        paddingL = convolutionOp->get_pads_begin();
+        paddingR = convolutionOp->get_pads_end();
+    } else if (groupConvolutionOp) {
+        algorithm = ConvolutionGrouped;
+
+        groupNum = groupConvolutionOp->input_value(1).get_shape()[0];
+        isGrouped = true;
+
+        weightDims = groupConvolutionOp->input_value(1).get_shape();
+
+        groupIC = weightDims[2];
+        IC = groupIC * groupNum;
+        groupOC = weightDims[1];
+
+        biasesDims = {groupOC * groupNum};
+
+        for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) {
+            stride.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_strides()[i]));
+        }
+        for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) {
+            dilation.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_dilations()[i]) - 1);
+        }
+        paddingL = groupConvolutionOp->get_pads_begin();
+        paddingR = groupConvolutionOp->get_pads_end();
     }
 }
 
-mkldnn::memory::data_type MKLDNNConvolutionNode::precisionToDataType(InferenceEngine::Precision prec) {
-    // MKLDNN Plugin doesn't support U16 layout so upcast to FP32 in this case
-    if (prec == Precision::U16)
-        prec = Precision::FP32;
+bool MKLDNNConvolutionNode::canBeExecutedInInt8() const {
+    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
+    if (!inputZeroPoints.empty())
+        inputDataType = memory::data_type::u8;
 
-    return MKLDNNExtensionUtils::IEPrecisionToDataType(prec);
+    auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1));
+    if (!weightsZeroPoints.empty())
+        weightsDataType = memory::data_type::s8;
+
+    return one_of(inputDataType, memory::data_type::u8, memory::data_type::s8) && weightsDataType == memory::data_type::s8;
 }
 
-bool MKLDNNConvolutionNode::canBeExecutedInInt8() {
-    auto * convLayer = dynamic_cast<ConvolutionLayer*>(getCnnLayer().get());
-    if (convLayer == nullptr)
-        IE_THROW() << "Cannot convert convolution layer.";
-
-    if (baseInputsNumber > 1) {
-        auto inputDataType = precisionToDataType(getCnnLayer()->insData[0].lock()->getPrecision());
-        if (!inputZeroPoints.empty())
-            inputDataType = memory::data_type::u8;
-
-        auto weightsDataType = precisionToDataType(Precision::FP32);
-        if (baseInputsNumber > 1) {
-            weightsDataType = precisionToDataType(getCnnLayer()->insData[1].lock()->getPrecision());
-            if (!weightsZeroPoints.empty())
-                weightsDataType = memory::data_type::s8;
-        }
-
-        return (inputDataType == mkldnn_s8 || inputDataType == mkldnn_u8) && weightsDataType == mkldnn_s8;
-    } else {
-        return this->getCnnLayer()->precision == Precision::I8;
-    }
-}
-
-InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(MKLDNNEltwiseNode *eltwiseNode, int findex) {
+InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const {
     InferenceEngine::Precision eltwisePrecision;
-    auto parent0 = getCreatorLayer(eltwiseNode->getCnnLayer()->insData[0].lock()).lock();
-    auto parent1 = getCreatorLayer(eltwiseNode->getCnnLayer()->insData[1].lock()).lock();
 
-    auto fusedParent = findex != 0 ? fusedWith[findex - 1].get()->getCnnLayer() : this->getCnnLayer();
-    eltwisePrecision = fusedParent == parent0 ? eltwiseNode->getCnnLayer()->insData[1].lock()->getPrecision() :
-        eltwiseNode->getCnnLayer()->insData[0].lock()->getPrecision();
+    int fusingPort = fusingNode->getFusingPort();
+    if (fusingPort == 0) {
+        eltwisePrecision = fusingNode->getOriginalInputPrecisionAtPort(1);
+    } else if (fusingPort == 1) {
+        eltwisePrecision = fusingNode->getOriginalInputPrecisionAtPort(0);
+    } else {
+        IE_THROW() << "Cannot determine Eltwise post op precision for Convolution node with name '" << getName() << "'";
+    }
+
     return eltwisePrecision;
 }
 
@@ -104,47 +130,43 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
     if (!descs.empty())
         return;
 
-    auto * convLayer = dynamic_cast<ConvolutionLayer*>(getCnnLayer().get());
-    if (convLayer == nullptr)
-        IE_THROW() << "Cannot convert convolution layer.";
+    withBiases = getOriginalInputsNumber() == 3;
 
     withSum = false;
-    int expectedInputEdgesNum = baseInputsNumber;
+    int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
     for (int i = 0; i < fusedWith.size(); i++) {
-        auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
-        if (convolutionNode) {
-            expectedInputEdgesNum += convolutionNode->getBaseIntputsNumber() - 1;
+        if (fusedWith[i]->getType() == Convolution) {
+            expectedInputEdgesNum += static_cast<int>(fusedWith[i]->getOriginalInputsNumber()) - 1;
         }
 
-        auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
-        if (eltwiseNode && eltwiseNode->isSum()) {
-            withSum = true;
-            expectedInputEdgesNum++;
+        if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
+            auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
+            if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
+                withSum = true;
+                expectedInputEdgesNum++;
+            }
         }
     }
 
-    auto inputDataType = precisionToDataType(getCnnLayer()->insData[0].lock()->getPrecision());
+    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
     if (!inputZeroPoints.empty())
         inputDataType = memory::data_type::u8;
 
-    auto outputDataType = precisionToDataType(getCnnLayer()->outData[0]->getPrecision());
+    auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0));
     eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
-    if (baseInputsNumber > 1) {
-        if (!fusedWith.empty()) {
-            auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
-            if (lastFusedLayer) {
-                outputDataType = precisionToDataType(lastFusedLayer->outData[0]->getPrecision());
-                eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
-            }
-        }
+    if (!fusedWith.empty()) {
+        outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
+        eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
+    }
 
-        // We need to make sure that convolution output and second input of fused Eltwise operation
-        // have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32.
-        if (outputDataType != memory::data_type::f32 && outputDataType != memory::data_type::bf16 && withSum) {
-            for (int i = 0; i < fusedWith.size(); i++) {
-                auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
-                if (eltwiseNode && eltwiseNode->isSum()) {
-                    eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
+    // We need to make sure that convolution output and second input of fused Eltwise operation
+    // have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32.
+    if (outputDataType != memory::data_type::f32 && outputDataType != memory::data_type::bf16 && withSum) {
+        for (int i = 0; i < fusedWith.size(); i++) {
+            if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
+                auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
+                if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
+                    eltwisePrecision = fusedEltwisePrecision(fusedWith[i]);
                     if (MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) {
                         eltwisePrecision = Precision::FP32;
                         outputDataType = memory::data_type::f32;
@@ -160,81 +182,7 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << "Incorrect number of output edges for layer " << getName();
 
-    if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
-        IE_THROW() << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
-    }
-
-    isMerged = (!getMergeWith().empty());  // grouped convolution was constructed from split->concat subgraph
-    isGrouped = convLayer->_group != 1;    // group info available from IR
-    if (isMerged && isGrouped)
-        IE_THROW() << "Convolution initialization. Group splitted mode are used together with direct group specification.";
-
-    // default values. Can be replaced in next steps
-    groupNum = convLayer->_group;
-    size_t IC = convLayer->input()->getDims()[1];
-    size_t groupIC = IC;
-    size_t groupOC = convLayer->_out_depth;
-
-    isDW = groupNum == groupOC && groupNum == groupIC;
-
-    if (isMerged) {
-        groupNum = getMergeWith().size() + 1;
-    }
-    if (isGrouped) {
-        groupIC /= groupNum;
-        groupOC /= groupNum;
-    }
-
-    weightDims.clear();
-    weightDims.push_back(groupOC);
-    weightDims.push_back(groupIC);
-    for (int i = 1; i <= convLayer->_kernel.size(); i++) {
-        weightDims.push_back(convLayer->_kernel[convLayer->_kernel.size() - i]);
-    }
-    biasesDims = { groupOC * groupNum };
-
-    if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum);
-
-    withBiases = (convLayer->_biases != nullptr && convLayer->_biases->size() != 0) || baseInputsNumber == 3;
-
-    if (baseInputsNumber == 1) {
-        internalBlobs.push_back(createInternalBlob(weightDims, true, isGrouped));
-
-        if (withBiases) {
-            internalBlobs.push_back(createInternalBlob(biasesDims, false));
-        }
-
-        Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
-        if (weights->getTensorDesc().getPrecision() == Precision::I8) {
-            // The weights blob has incorrect dims, so we have to fix it
-            TensorDesc wdesc = internalBlobs[0]->getTensorDesc();
-            wdesc.setPrecision(Precision::I8);
-            InferenceEngine::TBlob<int8_t>::Ptr reshapedInt8Weights =
-                    InferenceEngine::TBlob<int8_t>::Ptr(
-                            new InferenceEngine::TBlob<int8_t>(wdesc, static_cast<int8_t*>(weights->buffer()), weights->byteSize()));
-
-            internalBlobs[0] = reshapedInt8Weights;
-            if (withBiases) {
-                Blob::Ptr biases = this->getCnnLayer()->blobs.find("biases")->second;
-                TensorDesc bdesc = internalBlobs[1]->getTensorDesc();
-                bdesc.setPrecision(Precision::I32);
-                InferenceEngine::TBlob<int32_t>::Ptr reshapedInt32Biases =
-                        InferenceEngine::TBlob<int32_t>::Ptr(
-                                new InferenceEngine::TBlob<int32_t>(bdesc, static_cast<int32_t*>(biases->buffer()), biases->byteSize()));
-                internalBlobs[1] = reshapedInt32Biases;
-            }
-        }
-    }
-
-    invertVectorCopyUtoI(convLayer->_stride, stride);
-    for (int i = 1; i <= convLayer->_dilation.size(); i++) {
-        dilation.push_back(static_cast<int>(convLayer->_dilation[convLayer->_dilation.size() - i]) - 1);
-    }
-
-    auto allPads = getPaddings(*convLayer);
-    invertVectorCopyUtoI(allPads.begin, paddingL);
-    invertVectorCopyUtoI(allPads.end, paddingR);
-
+    int ndims = getParentEdgesAtPort(0)[0]->getDims().ndims();
     MKLDNNDims weightsDims = MKLDNNDims(weightDims);
 
     withDWConv = isFusedWith(Convolution);
@@ -242,29 +190,26 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
     for (int i = 0; i < fusedWith.size(); i++) {
         auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
         if (convolutionNode) {
-            auto *convLayer = reinterpret_cast<ConvolutionLayer *>(convolutionNode->getCnnLayer().get());
             dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
             dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
-            dw_conv_oc = convLayer->_out_depth;
-            for (int j = 0; j < convLayer->_kernel.size(); j++) {
-                dw_conv_kernel.push_back(convLayer->_kernel[j]);
-            }
-            for (int j = 0; j < convLayer->_stride.size(); j++) {
-                dw_conv_strides.push_back(convLayer->_stride[j]);
-            }
+            dw_conv_oc = convolutionNode->outDims[0][1];
+            const auto &dwWeightsDims = convolutionNode->inDims[1].ToSizeVector();
+            dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 1]);
+            dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 2]);
+            dw_conv_strides = convolutionNode->getStride();
 
             if (canBeExecutedInInt8()) {
                 if (i == 0) {
-                    dw_conv_in_dt = precisionToDataType(getCnnLayer()->outData[0]->getPrecision());
+                    dw_conv_in_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0));
                 } else {
-                    dw_conv_in_dt = precisionToDataType(fusedWith[i - 1].get()->getCnnLayer()->outData[0]->getPrecision());
+                    dw_conv_in_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[i - 1]->getOriginalOutputPrecisionAtPort(0));
                 }
             } else {
                 dw_conv_in_dt = memory::data_type::f32;
             }
 
             for (int j = 0; j < paddingR.size(); j++) {
-                int with_group = (isGrouped || isMerged) ? 1 : 0;
+                int with_group = isGrouped ? 1 : 0;
                 int krn = weightsDims[with_group + 2 + j];
                 int src = getParentEdgeAt(0)->getDims()[2 + j];
                 int dst = getChildEdgeAt(0)->getDims()[2 + j];
@@ -283,30 +228,32 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
             outputDataType = memory::data_type::f32;
         if (eltwisePrecision == Precision::BF16)
             eltwisePrecision = Precision::FP32;
-        in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
-        out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType,
-                getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
+        in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ndims == 5 ? memory::format_tag::ndhwc
+                                                                                                 : memory::format_tag::nhwc);
+        out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ndims == 5 ? memory::format_tag::ndhwc
+                                                                                                  : memory::format_tag::nhwc);
         createDescriptor({in_candidate}, {out_candidate});
     } else {
-        inputDataType = (convLayer->input()->getPrecision() == Precision::BF16
-        && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::data_type::bf16 : memory::data_type::f32;
-        outputDataType = (convLayer->outData[0]->getPrecision() == Precision::BF16
-        && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::data_type::bf16 : memory::data_type::f32;
+        inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
+                                                                                                           : memory::data_type::f32;
+        outputDataType = (getOriginalOutputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
+                                                                                                             : memory::data_type::f32;
         eltwisePrecision = Precision::FP32;
         for (int i = 0; i < fusedWith.size(); i++) {
-            auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
-            if (eltwiseNode && eltwiseNode->isSum()) {
-                eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
-                // TODO(amalyshe): there might be situation when convolution can be executed in BF16,
-                // output is required in FP32 but eltwise inplace tensor would be in BF16
-                // currently we forcedly change output to the BF16 that will add reoreder after the node
-                // Another situation can be when we mark output as FP32 and Eltwise asPrecison (which stand
-                // for input of inplace tensor precision) to FP32. This will add reorder for that in-place tensor
-                // bofore the fused convolution. This behaviour might be more correct regarding expected markup
-                // of the graph but performance of first and second approaches might be different. Need to verify
-                outputDataType = eltwisePrecision == Precision::BF16 ? memory::data_type::bf16 : memory::data_type::f32;
-                eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
+            if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
+                auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
+                if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) {
+                    eltwisePrecision = fusedEltwisePrecision(fusedWith[i]);
+                    // TODO(amalyshe): there might be situation when convolution can be executed in BF16,
+                    // output is required in FP32 but eltwise inplace tensor would be in BF16
+                    // currently we forcedly change output to the BF16 that will add reoreder after the node
+                    // Another situation can be when we mark output as FP32 and Eltwise asPrecison (which stand
+                    // for input of inplace tensor precision) to FP32. This will add reorder for that in-place tensor
+                    // bofore the fused convolution. This behaviour might be more correct regarding expected markup
+                    // of the graph but performance of first and second approaches might be different. Need to verify
+                    outputDataType = eltwisePrecision == Precision::BF16 ? memory::data_type::bf16 : memory::data_type::f32;
+                    eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
+                }
             }
         }
         // correction for cases of FP32 input - we do not have FP32 convolution supported BF16 output
@@ -316,16 +263,13 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
             eltwisePrecision = Precision::FP32;
         }
 
-        Layout layout = convLayer->input()->getLayout();
-
-        if (layout == NCHW || layout == NHWC) {
+        if (ndims == 4) {
             if (IC == 1 && groupOC == 1) {
                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw);
                 createDescriptor({in_candidate}, {out_candidate});
             } else if (IC == 3 || IC == 1) {
-                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                                                layout == NCHW ? memory::format_tag::nchw : memory::format_tag::nhwc);
+                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw16c);
                 createDescriptor({in_candidate}, {out_candidate});
                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw8c);
@@ -339,19 +283,16 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                 createDescriptor({in_candidate}, {out_candidate});
             }
 
-            in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                    layout == NCHW ? memory::format_tag::nchw : memory::format_tag::nhwc);
-            out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType,
-                    layout == NCHW ? memory::format_tag::nchw : memory::format_tag::nhwc);
+            in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
+            out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw);
             createDescriptor({in_candidate}, {out_candidate});
-        } else if (layout == NCDHW || layout == NDHWC) {
+        } else if (ndims == 5) {
             if (IC == 1 && groupOC == 1) {
                 in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw);
                 createDescriptor({in_candidate}, {out_candidate});
             } else if (IC == 3 || IC == 1) {
-                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                                                layout == NCDHW ? memory::format_tag::ncdhw : memory::format_tag::ndhwc);
+                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw16c);
                 createDescriptor({in_candidate}, {out_candidate});
                 out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw8c);
@@ -365,17 +306,14 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                 createDescriptor({in_candidate}, {out_candidate});
             }
 
-            in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType,
-                    layout == NCDHW ? memory::format_tag::ncdhw : memory::format_tag::ndhwc);
-            out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType,
-                    layout == NCDHW ? memory::format_tag::ncdhw : memory::format_tag::ndhwc);
+            in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
+            out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw);
             createDescriptor({in_candidate}, {out_candidate});
         }
     }
 }
 
-void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
-    int blob_idx = 0;
+void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) const {
     mkldnn::post_ops ops;
 
     for (auto &node : fusedWith) {
@@ -383,66 +321,31 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
             continue;
 
         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-        if (eltwiseNode && eltwiseNode->isSum()) {
-                ops.append_sum(1.0, precisionToDataType(eltwisePrecision));
-            continue;
-        }
-
         if (eltwiseNode) {
-            eltwiseNode->appendPostOps(ops);
+            if (eltwiseNode->isSpecialConvolutionAddFusing())
+                ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
+            else
+                eltwiseNode->appendPostOps(ops);
             continue;
         }
 
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode) {
-            quantizeNode->appendPostOps(ops);
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
+        if (fakeQuantizeNode) {
+            fakeQuantizeNode->appendPostOps(ops);
             continue;
         }
 
         auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
         if (convolutionNode) {
             if (initWeights) {
-                if (convolutionNode->getBaseIntputsNumber() == 1) {
-                    auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
-
-                    auto weightsPrc = precisionToDataType(convLayer->precision);
-                    auto biasPrc = memory::data_type::s32;
-
-                    PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                    MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
-                    PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g);
-                    PostOpsIntBlobMemory[blob_idx]->FillZero();
-
-                    Blob::Ptr weights = convLayer->blobs.find("weights")->second;
-                    Blob::Ptr biases = convLayer->blobs.find("biases")->second;
-
-                    PostOpsIntBlobMemory[blob_idx]->SetData(weightsPrc, memory::format_tag::goihw, weights->buffer(),
-                                                            dwWeightsDims.size() * MKLDNNExtensionUtils::sizeOfDataType(weightsPrc));
-
-                    PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                    MKLDNNDims dwBiasesDims({dw_conv_oc});
-                    PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, biasPrc, memory::format_tag::x);
-                    PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
-                    PostOpsIntBlobMemory[blob_idx + 1]->SetData(biasPrc, memory::format_tag::x, biases->buffer(),
-                                                                dwBiasesDims.size() * MKLDNNExtensionUtils::sizeOfDataType(biasPrc));
-                    // todo: rewrite onto append_dw_k3s2p1
-                    ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
-                                       dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
-                                       mkldnn::memory::convert_to_c(dw_conv_in_dt),
-                                       static_cast<const float *>(PostOpsIntBlobMemory[blob_idx]->GetData()),
-                                       static_cast<const float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData()));
-
-                    blob_idx += 2;
-                } else {
-                    // todo: rewrite onto append_dw_k3s2p1
-                    ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
-                                       dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
-                                       mkldnn::memory::convert_to_c(dw_conv_in_dt),
-                                       static_cast<const float *>(getParentEdgeAt(
-                                               baseInputsNumber + 0)->getMemory().GetData()),
-                                       static_cast<const float *>(getParentEdgeAt(
-                                               baseInputsNumber + 1)->getMemory().GetData()));
-                }
+                // todo: rewrite onto append_dw_k3s2p1
+                ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
+                                   dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
+                                   mkldnn::memory::convert_to_c(dw_conv_in_dt),
+                                   static_cast<const float *>(getParentEdgeAt(
+                                           getOriginalInputsNumber() + 0)->getMemory().GetData()),
+                                   static_cast<const float *>(getParentEdgeAt(
+                                           getOriginalInputsNumber() + 1)->getMemory().GetData()));
             } else {
                 // todo: rewrite onto append_dw_k3s2p1
                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
@@ -451,47 +354,6 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
                                    nullptr,
                                    nullptr);
             }
-
-            if (convolutionNode->wScale != nullptr) {
-                float* wScaleData = static_cast<float*>(convolutionNode->wScale->buffer());
-
-                std::vector<float> oScaleDataVector;
-                std::vector<float> oShiftDataVector;
-                if (convolutionNode->getCnnLayer()->precision == Precision::I8 &&
-                    convolutionNode->getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
-                    float *oScaleData = static_cast<float *>(convolutionNode->oScale->buffer());
-
-                    for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
-                        oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
-                        oShiftDataVector.push_back(0.f);
-                    }
-                } else {
-                    for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
-                        oScaleDataVector.push_back(wScaleData[c]);
-                        oShiftDataVector.push_back(0.f);
-                    }
-                }
-
-                MKLDNNDims oScaleDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
-
-                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                PostOpsIntBlobMemory[blob_idx]->Create(oScaleDims, memory::data_type::f32, memory::format_tag::x);
-                PostOpsIntBlobMemory[blob_idx]->FillZero();
-                PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::format_tag::x, &oScaleDataVector[0],
-                                                        oScaleDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
-
-                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                PostOpsIntBlobMemory[blob_idx + 1]->Create(oScaleDims, memory::data_type::f32, memory::format_tag::x);
-                PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
-                PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::format_tag::x, &oShiftDataVector[0],
-                                                            oShiftDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
-
-                ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift,
-                                     static_cast<const float *>(PostOpsIntBlobMemory[blob_idx]->GetData()),
-                                     static_cast<const float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData()));
-
-                blob_idx += 2;
-            }
             continue;
         }
 
@@ -528,8 +390,8 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
                 config.inConfs.push_back(dataConfig);
             }
 
-            if (withDWConv && baseInputsNumber > 1) {
-                auto weightsPrc = precisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
+            if (withDWConv) {
+                auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
                 auto biasPrc = memory::data_type::f32;
 
                 MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
@@ -553,7 +415,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
 
                 dataConfig.constant = false;
                 dataConfig.desc = getDstMemDesc(itpd, i);
-                if (!(isGrouped || isMerged))
+                if (!isGrouped)
                     dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
                 config.outConfs.push_back(dataConfig);
 
@@ -582,7 +444,6 @@ void MKLDNNConvolutionNode::createPrimitive() {
     mkldnn::primitive_attr attr;
     addZeroPoints(attr);
     setPostOps(attr, true);
-    addScaleToPrimitiveAttr(attr);
 
     auto prim_desc = createPrimitiveDescriptor<convolution_forward::primitive_desc,
             convolution_forward::desc>(attr);
@@ -590,11 +451,14 @@ void MKLDNNConvolutionNode::createPrimitive() {
     prim.reset(new convolution_forward(prim_desc));
 
     auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
+    auto wei = getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPrimitive();
     auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    if (withBiases)
-        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_BIAS, getBias()}, {DNNL_ARG_DST, dst}};
-    else
-        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_DST, dst}};
+    if (withBiases) {
+        auto bias = getParentEdgesAtPort(2)[0]->getMemoryPtr()->GetPrimitive();
+        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_BIAS, bias}, {DNNL_ARG_DST, dst}};
+    } else {
+        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_DST, dst}};
+    }
 }
 
 bool MKLDNNConvolutionNode::created() const {
@@ -605,55 +469,25 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::
                                              const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
     TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
 
-    mkldnn::memory::data_type wdt = precisionToDataType(inDesc.getPrecision());
-    mkldnn::memory::data_type bdt = precisionToDataType(inDesc.getPrecision());
-    if (inDesc.getPrecision() == Precision::BF16) {
-        bdt = mkldnn::memory::data_type::f32;
-    }
+    memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
+    memory::data_type bdt = memory::data_type::f32;
 
     if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) {
         wdt = memory::data_type::s8;
-        bdt = baseInputsNumber == 3 ? precisionToDataType(getCnnLayer()->insData[2].lock()->getPrecision()) : memory::data_type::s32;
-    }
-
-    if (baseInputsNumber == 1) {
-        Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
-
-        if (weights->getTensorDesc().getPrecision() == Precision::I8) {
-            wdt = memory::data_type::s8;
-            bdt = memory::data_type::s32;
-
-            Precision outPrec;
-            if (getCnnLayer()->outData[0]->getPrecision() == Precision::FP32) {
-                outPrec = Precision::FP32;
-            } else {
-                // define precision accordninly normalizer
-                // TODO(amalyshe) do we need to have separate flow for last in int8 chain or not?
-                outPrec = outDesc.getPrecision();
-            }
-
-            inDesc = TensorDesc(inDesc.getPrecision(), inputDesc[0].getDims(), inputDesc[0].getBlockingDesc());
-            outDesc = TensorDesc(outPrec, outputDesc[0].getDims(), outputDesc[0].getBlockingDesc());
-        }
     }
 
     MKLDNNMemoryDesc in_candidate(inDesc);
     MKLDNNMemoryDesc out_candidate(outDesc);
 
-    // grouping and autoblocking is not compatible
-    if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
-        return;
-
     MKLDNNDims blocked_weightDims(weightDims);
     MKLDNNDims blocked_biasesDims(biasesDims);
     MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::format_tag::any};
 
-    std::vector<algorithm> algorithms;
-    // We cannot map wino_format on tensor descriptor for now
-    if (getBaseIntputsNumber() == 1) {
-        algorithms.push_back(algorithm::convolution_winograd);
-    }
-    algorithms.push_back(algorithm::convolution_direct);
+    std::vector<mkldnn::algorithm> algorithms;
+
+    // TODO [NM]: We cannot map wino_format on tensor descriptor for now
+    // algorithms.push_back(algorithm::convolution_winograd);
+    algorithms.push_back(mkldnn::algorithm::convolution_direct);
 
     for (auto alg : algorithms) {
         try {
@@ -695,27 +529,6 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) const {
     }
 }
 
-void MKLDNNConvolutionNode::addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const {
-    if (wScale != nullptr) {
-        float* wScaleData = static_cast<float*>(wScale->buffer());
-
-        std::vector<float> oScaleDataVector;
-        if (getCnnLayer()->precision == Precision::I8 && getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
-            float *oScaleData = static_cast<float *>(oScale->buffer());
-
-            for (size_t c = 0; c < wScale->size(); c++) {
-                oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
-            }
-        } else {
-            for (size_t c = 0; c < wScale->size(); c++) {
-                oScaleDataVector.push_back(wScaleData[c]);
-            }
-        }
-
-        attr.set_output_scales(1 << 1 /*through C dim*/, oScaleDataVector);
-    }
-}
-
 void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
     auto* selectedPD = getSelectedPrimitiveDescriptor();
     if (!selectedPD) {
@@ -725,18 +538,15 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
     // Strided blobs feature support.
     // Works only for FP32 convolutions for now.
     bool isStridedBlobsSupported = true;
-    for (auto &insData : getCnnLayer()->insData) {
-        if (insData.lock()->getPrecision() != InferenceEngine::Precision::FP32
-            && insData.lock()->getPrecision() != InferenceEngine::Precision::BF16) {
-            isStridedBlobsSupported = false;
-            break;
-        }
-    }
 
-    // TODO: fix strided blobs feature support for dynamic weights
-    if (baseInputsNumber != 1) {
+    // TODO [NM]: refactor via using global executionPrecision.
+    if (canBeExecutedInInt8()) {
         isStridedBlobsSupported = false;
     }
+    // TODO [NM]: fix strided blobs feature support for dynamic weights
+    // if (getOriginalInputsNumber() != 1) {
+    //     isStridedBlobsSupported = false;
+    // }
 
     if (isStridedBlobsSupported) {
         createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
@@ -745,7 +555,6 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
     mkldnn::primitive_attr attr;
     addZeroPoints(attr);
     setPostOps(attr);
-    addScaleToPrimitiveAttr(attr);
 
     InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
     size_t selected_count = 0;
@@ -768,8 +577,8 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
                 cfg.inConfs.push_back(dataConfig);
             }
 
-            if (withDWConv && baseInputsNumber > 1) {
-                auto weightsPrc = precisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
+            if (withDWConv) {
+                auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
                 auto biasPrc = memory::data_type::f32;
 
                 MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
@@ -853,14 +662,14 @@ void MKLDNNConvolutionNode::filterSupportedDescriptors() {
     }
 }
 
-bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) {
+bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const {
     //  WA: In some cases, we can predict in advance the type of primitive that will be called in the future.
     //  In particular, isPossibleToSkipInitConfig() checks whether we can skip the creation of primitives with
     //  gemm implementation, which significantly increase the network load time.
     if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty())
         return false;
 
-    if (getCnnLayer()->params.find("PrimitivesPriority") != getCnnLayer()->params.end())
+    if (isPrimitivesPriorityDefined)
         return false;
 
     //  Here we check that we will not delete jit_planar_conv primitive by mistake.
@@ -920,12 +729,8 @@ MKLDNNMemoryDesc MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_ite
     }
 }
 
-const mkldnn::memory& MKLDNNConvolutionNode::getWeights() const {
-    return baseInputsNumber > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive();
-}
-
-const mkldnn::memory& MKLDNNConvolutionNode::getBias() const {
-    return baseInputsNumber > 2 ? getParentEdgeAt(2)->getMemory().GetPrimitive() : internalBlobMemory[1]->GetPrimitive();
+bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
+    return canFuseSimpleOperation(node);
 }
 
 InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
index 4c11b331f75..10c4755230a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
@@ -16,9 +16,10 @@ class MKLDNNEltwiseNode;
 
 class MKLDNNConvolutionNode : public MKLDNNNode {
 public:
-    MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNConvolutionNode() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
                           const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
@@ -26,50 +27,45 @@ public:
     void createPrimitive() override;
     void initSupportedPrimitiveDescriptors() override;
     void filterSupportedPrimitiveDescriptors() override;
-    void filterSupportedDescriptors();
-    bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc);
     bool created() const override;
     bool canBeInPlace() const override {
         return false;
     }
-
-    void setPostOps(mkldnn::primitive_attr &attr, bool initWeights);
-
-    size_t descInputNumbers(MKLDNNDescriptor desc) override {
-        return static_cast<size_t>(baseInputsNumber);
-    }
-
-    int getBaseIntputsNumber() {
-        return baseInputsNumber;
-    }
-
-    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
-
-    const mkldnn::memory& getWeights() const;
-    const mkldnn::memory& getBias() const;
-
-    bool canBeExecutedInInt8();
-
     InferenceEngine::Precision getRuntimePrecision() const override;
+    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
+    size_t descInputNumbers(MKLDNNDescriptor desc) override {
+        return static_cast<size_t>(getOriginalInputsNumber());
+    }
+
+    bool canBeExecutedInInt8() const;
+    size_t getGroupNum() const { return groupNum; }
 
     std::vector<uint8_t> inputZeroPoints;
     std::vector<float> weightsZeroPoints;
     std::vector<int32_t> outputCompensation;
 
+    const InferenceEngine::SizeVector &getWeightDims() { return weightDims; }
+    const std::vector<ptrdiff_t> &getStride() { return stride; }
+    const std::vector<ptrdiff_t> &getDilation() { return dilation; }
+    const std::vector<ptrdiff_t> &getPaddingL() { return paddingL; }
+    const std::vector<ptrdiff_t> &getPaddingR() { return paddingR; }
+
+    bool canFuse(const MKLDNNNodePtr& node) const override;
+
 protected:
-    void addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const;
-    InferenceEngine::Precision fusedEltwisePrecision(MKLDNNEltwiseNode *eltwiseNode, int findex);
+    InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;
 
 private:
-    mkldnn::memory::data_type precisionToDataType(InferenceEngine::Precision prec);
     void addZeroPoints(mkldnn::primitive_attr& attr) const;
+    void setPostOps(mkldnn::primitive_attr &attr, bool initWeights) const ;
+    void filterSupportedDescriptors();
+    bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;
 
     bool withBiases;
     bool withSum;
     bool withDWConv;
-    bool isDW;
-    bool isMerged;
     bool isGrouped;
+    bool isPrimitivesPriorityDefined;
     std::vector<ptrdiff_t> stride;
     std::vector<ptrdiff_t> dilation;
     std::vector<ptrdiff_t> paddingL;
@@ -83,14 +79,16 @@ private:
     std::vector<ptrdiff_t> dw_conv_kernel;
     std::vector<ptrdiff_t> dw_conv_strides;
     mkldnn::memory::data_type dw_conv_in_dt;
-    std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
-
-    InferenceEngine::Blob::Ptr wScale, oScale;
 
     size_t groupNum;
-    int baseInputsNumber;
+    size_t IC;
+    size_t groupIC;
+    size_t groupOC;
 
     InferenceEngine::Precision eltwisePrecision;
+
+    const size_t X_AXIS = 0;
+    const size_t Y_AXIS = 1;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
index 6f67d116ac1..d226dd73890 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
@@ -6,15 +6,43 @@
 #include "mkldnn_convert_node.h"
 #include "common/cpu_convert.h"
 #include "common/tensor_desc_creator.h"
-
-#define THROW_ERROR IE_THROW() << getTypeStr() << " layer with name '" << getName() <<"' ERROR: "
+#include <ngraph/opsets/opset1.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto convert = std::dynamic_pointer_cast<const ngraph::opset1::Convert>(op);
+        if (!convert) {
+            errorMessage = "Only opset1 Convert operation is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "Convert node with name '" + getName() + "'";
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
+
+MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
+                                     const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode("Convert", nodeName, eng, cache) {
+    inDims.emplace_back(dims);
+    addOriginalInputPrecision(inPrc);
+    outDims.emplace_back(dims);
+    addOriginalOutputPrecision(outPrc);
+}
 
 void MKLDNNConvertNode::getSupportedDescriptors() {
     // if tensor descriptors are set via setDescs method we need to update the inDims/outDims data
@@ -24,20 +52,15 @@ void MKLDNNConvertNode::getSupportedDescriptors() {
     if (inDims.empty() && input && input->getLayout() != InferenceEngine::Layout::ANY)
         inDims.push_back(MKLDNNDims(input->getDims()));
     if (getParentEdges().size() != 1)
-        THROW_ERROR << "Incorrect number of input edges";
+        IE_THROW() << errorPrefix << " has incorrect number of input edges";
     if (getChildEdges().empty())
-        THROW_ERROR << "Incorrect number of output edges";
+        IE_THROW() << errorPrefix << " has incorrect number of output edges";
 }
 
 void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto layer = getCnnLayer();
-    if (layer == nullptr) {
-        THROW_ERROR << "Cannot get CNN layer";
-    }
-
     LayerConfig config;
     DataConfig dataIn;
     DataConfig dataConfigOut;
@@ -54,16 +77,11 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
         dataConfigOut.desc = TensorDesc(output->getPrecision(), input->getDims(), blockingDesc);
         config.outConfs.push_back(dataConfigOut);
         supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
-    } else if (layer->insData.size() == 1 && layer->outData.size() == 1) {
-        auto insData = layer->insData[0].lock();
-        if (nullptr == insData) {
-            THROW_ERROR << "Input data is empty";
-        }
-
-        const SizeVector& insDims = insData->getTensorDesc().getDims();
-        auto insPrecision = insData->getTensorDesc().getPrecision();
-        const SizeVector& outputDims = layer->outData[0]->getTensorDesc().getDims();
-        auto outPrecision = layer->outData[0]->getTensorDesc().getPrecision();
+    } else if (getOriginalInputsNumber() == 1 && getOriginalOutputsNumber() == 1) {
+        const SizeVector& insDims = getParentEdgeAt(0)->getDims().ToSizeVector();
+        auto insPrecision = getOriginalInputPrecisionAtPort(0);
+        const SizeVector& outputDims = getChildEdgeAt(0)->getDims().ToSizeVector();
+        auto outPrecision = getOriginalOutputPrecisionAtPort(0);
 
         config.inConfs.push_back(dataIn);
         config.outConfs.push_back(dataConfigOut);
@@ -78,7 +96,7 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
             supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
         }
     } else {
-        THROW_ERROR << "Incorrect number of input/output edges";
+        IE_THROW() << errorPrefix << " has incorrect number of input/output edges";
     }
 }
 
@@ -86,18 +104,18 @@ void MKLDNNConvertNode::createPrimitive() {
     auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
     auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        THROW_ERROR << "Destination memory didn't allocate.";
+        IE_THROW() << errorPrefix << " has not allocated destination memory";
     if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
-        THROW_ERROR << "Input memory didn't allocate.";
+        IE_THROW() << errorPrefix << " has not allocated input memory";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        THROW_ERROR << "Preferable primitive descriptor is not set.";
+        IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
 }
 
 void MKLDNNConvertNode::execute(mkldnn::stream strm) {
     auto& parentMem = getParentEdgeAt(0)->getMemory();
     auto& childMem = getChildEdgeAt(0)->getMemory();
     if (parentMem.GetElementsCount() != childMem.GetElementsCount())
-        THROW_ERROR << "Input and output buffers have different elements count";
+        IE_THROW() << errorPrefix << " has different elements number in input and output buffers";
 
     void* srcPtr = parentMem.GetPtr();
     void* dstPtr = childMem.GetPtr();
@@ -107,4 +125,5 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {
 bool MKLDNNConvertNode::created() const {
     return getType() == Convert;
 }
+
 REG_MKLDNN_PRIM_FOR(MKLDNNConvertNode, Convert);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
index 51313546b3f..6aacf7f1e5a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
@@ -13,7 +13,9 @@ namespace MKLDNNPlugin {
 
 class MKLDNNConvertNode : public MKLDNNNode {
 public:
-    MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
+                      const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNConvertNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -37,9 +39,13 @@ public:
     std::shared_ptr<const InferenceEngine::TensorDesc> getInput() const { return input; }
     std::shared_ptr<const InferenceEngine::TensorDesc> getOutput() const { return output; }
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     std::shared_ptr<InferenceEngine::TensorDesc> input;
     std::shared_ptr<InferenceEngine::TensorDesc> output;
+
+    std::string errorPrefix;
 };
 }  // namespace MKLDNNPlugin
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
index 2fbfedf75b8..b18763f0e5e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@@ -3,115 +3,110 @@
 //
 
 #include "mkldnn_deconv_node.h"
-#include <legacy/ie_layers.h>
+#include "mkldnn_eltwise_node.h"
 #include <mkldnn.hpp>
 #include <string>
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <legacy/ie_layers_internal.hpp>
 #include "ie_parallel.hpp"
 #include "utils/general_utils.h"
+#include <ngraph/opsets/opset1.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const InferenceEngine::CNNLayerPtr& layer,
-                                                 const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(layer, eng, cache) {
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0));
-    });
+bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op) == nullptr &&
+                std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op) == nullptr) {
+            errorMessage = "Only opset1 ConvolutionBackpropData and GroupConvolutionBackpropData operations are supported";
+            return false;
+        }
+        size_t ndims = op->get_input_shape(0).size();
+        if ((ndims < 3) || (ndims > 5)) {
+            errorMessage = "Only 3D, 4D and 5D blobs are supported as input";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op,
+                                                 const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "Deconvolution node with name '" + getName() + "'";
+
+        auto convBackprop = std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op);
+        auto groupConvBackprop = std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op);
+        const auto dataShape = op->get_input_shape(0);
+        weightDims = op->get_input_shape(1);
+        const auto outShape = op->get_shape();
+        OC = outShape[1];
+        IC = dataShape[1];
+
+        if (convBackprop) {
+            algorithm = DeconvolutionCommon;
+
+            groupNum = 1;
+            withGroups = false;
+
+            for (int i = 0; i < convBackprop->get_strides().size(); i++) {
+                stride.push_back(static_cast<ptrdiff_t>(convBackprop->get_strides()[i]));
+            }
+            for (int i = 0; i < convBackprop->get_dilations().size(); i++) {
+                dilation.push_back(static_cast<ptrdiff_t>(convBackprop->get_dilations()[i]) - 1);
+            }
+            paddingL = convBackprop->get_pads_begin();
+            paddingR = convBackprop->get_pads_end();
+        } else if (groupConvBackprop) {
+            algorithm = DeconvolutionGrouped;
+
+            groupNum = weightDims[0];
+            withGroups = groupNum > 1;
+            isDW = withGroups && groupNum == OC && groupNum == IC;
+
+            for (int i = 0; i < groupConvBackprop->get_strides().size(); i++) {
+                stride.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_strides()[i]));
+            }
+            for (int i = 0; i < groupConvBackprop->get_dilations().size(); i++) {
+                dilation.push_back(static_cast<ptrdiff_t>(groupConvBackprop->get_dilations()[i]) - 1);
+            }
+            paddingL = groupConvBackprop->get_pads_begin();
+            paddingR = groupConvBackprop->get_pads_end();
+        }
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
 }
 
 void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
     if (!descs_fwd.empty() && !descs_bwd.empty())
         return;
 
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
-    if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16)
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
+    if (!one_of(precision, InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16))
         precision = InferenceEngine::Precision::FP32;
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-    precision = getCnnLayer()->outData[0]->getPrecision();
-    if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16)
+    precision = getOriginalOutputPrecisionAtPort(0);
+    if (!one_of(precision, InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16))
         precision = InferenceEngine::Precision::FP32;
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
     if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16)
        inputDataType = outputDataType = memory::data_type::bf16;
 
-    if (getParentEdges().empty() || getParentEdges().size() > 3)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
+    if (getParentEdges().size() != 2 && getParentEdges().size() != 3)
+        IE_THROW() << errorPrefix << " has incorrect number of input edges";
     if (getChildEdges().empty())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
-
-    auto * deconvLayer = dynamic_cast<DeconvolutionLayer*>(getCnnLayer().get());
-    if (deconvLayer == nullptr)
-        IE_THROW() << "Cannot convert deconvolution layer.";
-    if (getParentEdges().size() == 1 && deconvLayer->_weights == nullptr) {
-        IE_THROW() << "Weights are empty for layer: " << deconvLayer->name
-                           << " used in MKLDNN node: " << getName() << "\n"
-                           << "Use the second argumemt of InferenceEngine::Core::ReadNetwork"
-                           << " to load them from .bin part of the IR";
-    }
-    withGroups = (deconvLayer->_group > 1);
-    isDW = withGroups && deconvLayer->_group == deconvLayer->_out_depth &&
-            deconvLayer->_group == deconvLayer->input()->getDims()[1];
-
-    bool withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0) || getParentEdges().size() == 3;
-    if (withBiases) {
-        Blob::Ptr biases;
-
-        if (getParentEdges().size() == 3) {
-            auto biasLayer = getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer();
-            if (biasLayer->type != "Const")
-                IE_THROW() << "Deconvolution layer with name '" << getName() << "' doesn't support non-constant biases";
-            biases = biasLayer->blobs["custom"];
-        } else {
-            biases = deconvLayer->_biases;
-        }
-
-        //  WA: we add bias as depthwise post op
-        setBiasAsPostOp(biases);
-    }
-
-    /* Original layout format for deconv weights is iohw (from Caffe).
-     * We specify oihw, but mean iohw, because there are no more
-     * suitable format in MKLDNN.
-     */
-    SizeVector weightDims;
-    if (withGroups) {
-        weightDims = {
-                deconvLayer->_group,
-                deconvLayer->input()->getTensorDesc().getDims()[1] / deconvLayer->_group,
-                deconvLayer->_out_depth / deconvLayer->_group,
-        };
-        groupNum = deconvLayer->_group;
-    } else {
-        weightDims = {
-                deconvLayer->input()->getTensorDesc().getDims()[1],
-                deconvLayer->_out_depth
-        };
-    }
-    for (int i = 1; i <= deconvLayer->_kernel.size(); i++) {
-        weightDims.push_back(deconvLayer->_kernel[deconvLayer->_kernel.size() - i]);
-    }
-
-    if (getParentEdges().size() == 1)
-        internalBlobs.push_back(createInternalBlob(weightDims, true));
-
-    invertVectorCopyUtoI(deconvLayer->_stride, stride);
-    for (int i = 1; i <= deconvLayer->_dilation.size(); i++) {
-        dilation.push_back(static_cast<int>(deconvLayer->_dilation[deconvLayer->_dilation.size() - i]) - 1);
-    }
-    auto allPads = getPaddings(*deconvLayer);
-    invertVectorCopyUtoI(allPads.begin, paddingL);
-    invertVectorCopyUtoI(allPads.end, paddingR);
-
-    weightsDims = MKLDNNDims(weightDims);
+        IE_THROW() << errorPrefix << " has incorrect number of output edges";
 
     for (int i = 0; i < paddingR.size(); i++) {
-        int with_group = (withGroups) ? 1 : 0;
-        int krn = weightsDims[with_group + 2 + i];
+        int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
+        int krn = weightDims[with_group + 2 + i];
         int src = getChildEdgeAt(0)->getDims()[2 + i];
         int dst = getParentEdgeAt(0)->getDims()[2 + i];
 
@@ -125,32 +120,20 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
         MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format);
         createDescriptor({in_candidate}, {out_candidate});
     }
+    setPostOps(attr);
 }
 
-void MKLDNNDeconvolutionNode::setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases) {
+void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
     mkldnn::post_ops ops;
-    auto depthwiseSize = static_cast<ptrdiff_t>(rnd_up(biases->size(), 16));
 
-    PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-    PostOpsIntBlobMemory[0]->Create({depthwiseSize}, memory::data_type::f32, memory::format_tag::x);
-    PostOpsIntBlobMemory[0]->FillZero();
-    std::vector<float> weights(depthwiseSize, 1.0f);
-    std::fill(weights.begin() + biases->size(), weights.end(), 0.0f);
-    PostOpsIntBlobMemory[0]->SetData(memory::data_type::f32, memory::format_tag::x, weights.data(),
-            weights.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
-
-    PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-    PostOpsIntBlobMemory[1]->Create({depthwiseSize}, memory::data_type::f32, memory::format_tag::x);
-    PostOpsIntBlobMemory[1]->FillZero();
-    auto biases_ptr = biases->buffer().as<float*>();
-    std::vector<float> bias(depthwiseSize, 0.0f);
-    std::copy(biases_ptr, biases_ptr + biases->size(), bias.begin());
-    PostOpsIntBlobMemory[1]->SetData(memory::data_type::f32, memory::format_tag::x, bias.data(),
-             bias.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
-
-    ops.append_depthwise(algorithm::depthwise_scale_shift,
-                         (const float *) PostOpsIntBlobMemory[0]->GetData(),
-                         (const float *) PostOpsIntBlobMemory[1]->GetData());
+    for (auto &node : fusedWith) {
+        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
+        if (eltwiseNode) {
+            eltwiseNode->appendPostOps(ops);
+            continue;
+        }
+        IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
+    }
 
     attr.set_post_ops(ops);
 }
@@ -199,8 +182,9 @@ void MKLDNNDeconvolutionNode::createPrimitive() {
     prim.reset(new convolution_backward_data(prim_desc));
 
     auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
+    auto weights = getParentEdgeAt(1)->getMemory().GetPrimitive();
     auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_DIFF_SRC, dst}};
+    primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}};
 }
 
 void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
@@ -212,8 +196,9 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
     if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
         return;
 
+    MKLDNNDims weightsDims = MKLDNNDims(weightDims);
     MKLDNNMemoryDesc wgh_candidate{weightsDims, in_candidate.getDataType(), memory::format_tag::any};
-    for (auto alg : {algorithm::convolution_winograd, algorithm::convolution_direct}) {
+    for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) {
         auto convert = [] (const std::vector<ptrdiff_t>& orig_dims) {
             return memory::dims(orig_dims.begin(), orig_dims.end());
         };
@@ -228,11 +213,11 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
 
         std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
         deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate, wgh_candidate,
-                                                    in_candidate,
-                                                    convert(stride),
-                                                    convert(dilation),
-                                                    convert(paddingL),
-                                                    convert(paddingR)));
+                                                              in_candidate,
+                                                              convert(stride),
+                                                              convert(dilation),
+                                                              convert(paddingL),
+                                                              convert(paddingR)));
         descs_fwd.push_back(conv_desc);
         descs_bwd.push_back(deconv_desc);
 
@@ -245,6 +230,12 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
 }
 
 MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+    if (idx == 2) {
+        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(getOriginalInputPrecisionAtPort(2),
+                                                            getParentEdgeAt(2)->getDims().ToSizeVector(),
+                                                            TensorDesc::getLayoutByDims(getParentEdgeAt(2)->getDims().ToSizeVector())));
+    }
+
     InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1))
                                                : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_desc(idx));
 
@@ -256,7 +247,7 @@ MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_i
         if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(),
                                                                                        desc.getBlockingDesc().getOrder().end()) + 1) {
             auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector();
-            auto new_dims = weightsDims.ToSizeVector();
+            auto new_dims = weightDims;
 
             auto td = InferenceEngine::TensorDesc(desc.getPrecision(),
                                                   new_dims,
@@ -285,10 +276,6 @@ MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_i
                                                             desc.getBlockingDesc()));
 }
 
-const mkldnn::memory& MKLDNNDeconvolutionNode::getWeights() const {
-    return getParentEdges().size() > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive();
-}
-
 InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const {
     std::vector<InferenceEngine::Precision> inputPrecisions;
     // Don't take bias precision into account
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
index 8caf8373579..f5715ea6368 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
@@ -14,7 +14,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNDeconvolutionNode : public MKLDNNNode {
 public:
-    MKLDNNDeconvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNDeconvolutionNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -37,23 +37,27 @@ public:
 
     InferenceEngine::Precision getRuntimePrecision() const override;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     bool withGroups = false;
     bool isDW = false;
     size_t groupNum = 1;
+    size_t outDepth;
+    size_t IC;
+    size_t OC;
     std::vector<ptrdiff_t> stride;
-    std::vector<ptrdiff_t> paddingL;
     std::vector<ptrdiff_t> dilation;
+    std::vector<ptrdiff_t> paddingL;
     std::vector<ptrdiff_t> paddingR;
-    MKLDNNDims weightsDims;
+    InferenceEngine::SizeVector weightDims;
     std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
     std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;
 
     mkldnn::primitive_attr attr;
-    std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
-    void setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases);
+    void setPostOps(mkldnn::primitive_attr &attr);
 
-    const mkldnn::memory& getWeights() const;
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp
index 8b2ea38864e..dde4d960c58 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp
@@ -7,13 +7,11 @@
 #include "mkldnn_input_node.h"
 
 #include "mkldnn_eltwise_node.h"
-#include <legacy/ie_layers.h>
 #include <string>
 #include <vector>
 #include <math.h>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <legacy/ie_layers_internal.hpp>
 #include <cpu/x64/jit_generator.hpp>
 #include "ie_parallel.hpp"
 
@@ -741,18 +739,45 @@ private:
     }
 };
 
-MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const InferenceEngine::CNNLayerPtr& layer,
+bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto defConvNode = ngraph::as_type_ptr<const ngraph::op::v1::DeformableConvolution>(op);
+        if (!defConvNode) {
+            errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shared_ptr<ngraph::Node>& op,
                                                                  const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+    auto defConvNode = ngraph::as_type_ptr<const ngraph::op::v1::DeformableConvolution>(op);
+
+    group = defConvNode->get_group();
+    deformable_group = defConvNode->get_deformable_group();
+
+    auto& strides = defConvNode->get_strides();
+    for (int i = 0; i < strides.size(); i++) {
+        stride.push_back(strides[i]);
+    }
+
+    auto& dilations = defConvNode->get_dilations();
+    for (int i = 1; i <= dilations.size(); i++) {
+        dilation.push_back(dilations[dilations.size() - i] - 1);
+    }
+
+    paddingL = defConvNode->get_pads_begin();
+}
 
 void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
-    if (!descs.empty())
-        return;
-
-    auto * defConvLayer = dynamic_cast<DeformableConvolutionLayer*>(getCnnLayer().get());
-    if (defConvLayer == nullptr)
-        IE_THROW() << "Cannot convert deformable convolution layer.";
-
     std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' ";
 
     if (getParentEdges().size() != 3)
@@ -764,10 +789,6 @@ void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
         IE_THROW() << "Deformable convolution layer. Unsupported mode. Only 4D blobs are supported as input.";
     }
 
-    if (getParentEdgeAt(0)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims();
-    }
-
     if (getParentEdgeAt(1)->getDims().ndims() != 4) {
         IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims();
     }
@@ -779,25 +800,6 @@ void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
     if (getChildEdgeAt(0)->getDims().ndims() != 4) {
         IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims();
     }
-
-    bool isMerged = (!getMergeWith().empty());
-    bool isGrouped = defConvLayer->_group != 1;
-    if (isMerged && isGrouped)
-        IE_THROW() << errorPrefix << "cannot be initialized: group splitted mode are used together with direct group specification.";
-
-    group = defConvLayer->_group;
-    if (isMerged) {
-        group = getMergeWith().size() + 1;
-    }
-
-    invertVectorCopyUtoI(defConvLayer->_stride, stride);
-    deformable_group = defConvLayer->_deformable_group;
-    for (int i = 1; i <= defConvLayer->_dilation.size(); i++) {
-        dilation.push_back(static_cast<int>(defConvLayer->_dilation[defConvLayer->_dilation.size() - i] - 1));
-    }
-
-    auto allPads = getPaddings(*defConvLayer);
-    invertVectorCopyUtoI(allPads.begin, paddingL);
 }
 
 void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h
index 9a1379ab2eb..2c9a77a8c96 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <mkldnn_node.h>
 #include <memory>
 #include <string>
@@ -65,9 +64,10 @@ struct jit_uni_def_conv_kernel {
 
 class MKLDNNDeformableConvolutionNode : public MKLDNNNode {
 public:
-    MKLDNNDeformableConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNDeformableConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNDeformableConvolutionNode() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void createPrimitive() override;
     void initSupportedPrimitiveDescriptors() override;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
index 6764e49c8d9..a91946507d8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
@@ -4,10 +4,11 @@
 
 #include "mkldnn_depth_to_space_node.h"
 
-#include <legacy/ie_layers.h>
 #include <cpu/x64/jit_generator.hpp>
 #include <mkldnn_extension_utils.h>
 #include "common/tensor_desc_creator.h"
+#include <utils/general_utils.h>
+#include <ngraph/opsets/opset1.hpp>
 
 #include <string>
 #include <cmath>
@@ -20,43 +21,59 @@ using namespace mkldnn;
 using namespace mkldnn::impl;
 using namespace mkldnn::impl::cpu::x64;
 
-MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto depthToSpace = std::dynamic_pointer_cast<const ngraph::opset1::DepthToSpace>(op);
+        if (!depthToSpace) {
+            errorMessage = "Only opset1 DepthToSpace operation is supported";
+            return false;
+        }
+        const auto mode = depthToSpace->get_mode();
+        if (!one_of(mode, ngraph::op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST, ngraph::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST)) {
+            errorMessage = "Does not support mode: " + ngraph::as_string(mode);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        const auto depthToSpace = std::dynamic_pointer_cast<const ngraph::opset1::DepthToSpace>(op);
+
+        const auto modeNgraph = depthToSpace->get_mode();
+        if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST) {
+            mode = Mode::BLOCKS_FIRST;
+        } else if (modeNgraph == ngraph::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST) {
+            mode = Mode::DEPTH_FIRST;
+        } else {
+            THROW_ERROR << "doesn't support mode: " << ngraph::as_string(modeNgraph);
+        }
+
+        blockSize = depthToSpace->get_block_size();
+        if (blockSize == 0)
+            THROW_ERROR << "has incorrect block_size parameter is zero!";
+
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNDepthToSpaceNode::getSupportedDescriptors() {
-    auto* depthToSpaceLayer = dynamic_cast<DepthToSpaceLayer*>(getCnnLayer().get());
-    if (depthToSpaceLayer == nullptr)
-        THROW_ERROR << "cannot convert from CNN layer";
-
-    if (depthToSpaceLayer->insData[0].lock() == nullptr)
-        THROW_ERROR << "has nullable input data";
-
-    SizeVector srcDims = depthToSpaceLayer->insData[0].lock()->getTensorDesc().getDims();
+    SizeVector srcDims = inDims[0].ToSizeVector();
     if (srcDims.size() < 3)
         THROW_ERROR << "has incorrect number of input dimensions";
     if (srcDims.size() > 5)
         THROW_ERROR << "doesn't support dimensions with rank greater than 5";
 
-    if (depthToSpaceLayer->outData[0] == nullptr)
-        THROW_ERROR << "has nullable output data";
-
-    SizeVector dstDims = depthToSpaceLayer->outData[0]->getTensorDesc().getDims();
+    SizeVector dstDims = outDims[0].ToSizeVector();
     if (srcDims.size() != dstDims.size())
         THROW_ERROR << "has incorrect number of input/output dimensions";
 
-    std::string modeString = depthToSpaceLayer->GetParamAsString("mode");
-    if (modeString == "blocks_first") {
-        mode = Mode::BLOCKS_FIRST;
-    } else if (modeString == "depth_first") {
-        mode = Mode::DEPTH_FIRST;
-    } else {
-        THROW_ERROR << "doesn't support mode: " << modeString;
-    }
-
-    blockSize = depthToSpaceLayer->GetParamAsUInt("block_size", 1);
-    if (blockSize == 0)
-        THROW_ERROR << "has incorrect block_size parameter is zero!";
-
     size_t nSpatialDims = srcDims.size() - 2;
     blockStep = static_cast<size_t>(std::pow(blockSize, nSpatialDims));
     if (srcDims[1] % blockStep)
@@ -80,7 +97,7 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
     auto srcDims = getParentEdgeAt(0)->getDims();
     const size_t nDims = srcDims.ndims();
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.h
index 3ca5f8709a9..140c290e3a7 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.h
@@ -13,7 +13,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNDepthToSpaceNode : public MKLDNNNode {
 public:
-    MKLDNNDepthToSpaceNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNDepthToSpaceNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -22,6 +22,8 @@ public:
     void execute(mkldnn::stream strm) override;
     bool created() const override;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     enum Mode {
         BLOCKS_FIRST = 0,
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
index fca94bf51d9..734605a3b72 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
@@ -4,7 +4,6 @@
 
 #include "mkldnn_eltwise_node.h"
 
-#include <legacy/ie_layers.h>
 #include <ie_parallel.hpp>
 
 #include <mkldnn_types.h>
@@ -13,14 +12,23 @@
 #include <cpu/ref_eltwise.hpp>
 
 #include "mkldnn_extension_utils.h"
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_pooling_node.h"
+#include "mkldnn_input_node.h"
+#include "common/cpu_convert.h"
 
 #include "emitters/jit_emitter.hpp"
 #include "emitters/jit_eltwise_emitters.hpp"
 #include "emitters/jit_mkldnn_emitters.hpp"
 #include "emitters/jit_bf16_emitters.hpp"
 #include <mkldnn_selective_build.h>
+#include "utils/general_utils.h"
+
+#include "ngraph/ngraph.hpp"
+#include <ngraph/opsets/opset1.hpp>
+#include "ngraph_transformations/op/power_static.hpp"
+#include "ngraph_transformations/op/leaky_relu.hpp"
+#include "ngraph_transformations/op/swish_cpu.hpp"
 
 #include <string>
 #include <vector>
@@ -28,6 +36,7 @@
 #include <algorithm>
 #include <cmath>
 #include <map>
+#include <functional>
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -115,9 +124,9 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu
         for (int i = 0; i < eltwiseNode.getFusedWith().size(); i++) {
             if (eltwiseNode.getFusedWith()[i].get()->getType() == Eltwise) {
                 post_op_emitters.push_back(create_eltwise_emitter(*eltwiseNode.getFusedWith()[i].get(), exec_prc));
-            } else if (eltwiseNode.getFusedWith()[i].get()->getType() == Quantize) {
-                auto quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(eltwiseNode.getFusedWith()[i].get());
-                quantizeNode->appendPostOps(post_ops);
+            } else if (eltwiseNode.getFusedWith()[i].get()->getType() == FakeQuantize) {
+                auto fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode*>(eltwiseNode.getFusedWith()[i].get());
+                fakeQuantizeNode->appendPostOps(post_ops);
 
                 quantization_injectors.push_back(std::make_shared<jit_uni_quantization_injector_f32<isa>>(
                         this, post_ops.get()->entry_[post_ops.len() - 1], vmm_d_weights, vmm_d_bias, reg_d_weights, reg_d_bias));
@@ -360,54 +369,49 @@ private:
     };
 
     std::set<Precision> get_supported_precisions(MKLDNNNode& node) {
-        auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(node);
-
         std::set<Precision> precisions;
 
-        OV_SWITCH(MKLDNNPlugin, SupportedPrecisions, precisions, eltwiseNode.getOpType(),
-        OV_CASE(Relu, jit_mkldnn_aux_emitter),
-        OV_CASE(Gelu, jit_mkldnn_aux_emitter),
-        OV_CASE(Elu, jit_mkldnn_aux_emitter),
-        OV_CASE(Tanh, jit_mkldnn_aux_emitter),
-        OV_CASE(Logistic, jit_mkldnn_aux_emitter),
-        OV_CASE(Square, jit_mkldnn_aux_emitter),
-        OV_CASE(Abs, jit_mkldnn_aux_emitter),
-        OV_CASE(Sqrt, jit_mkldnn_aux_emitter),
-        OV_CASE(Linear, jit_mkldnn_aux_emitter),
-        OV_CASE(BoundedRelu, jit_mkldnn_aux_emitter),
-        OV_CASE(SoftRelu, jit_mkldnn_aux_emitter),
-        OV_CASE(Relu6, jit_mkldnn_aux_emitter),
-        OV_CASE(Exp, jit_mkldnn_aux_emitter),
-        OV_CASE(Clamp, jit_mkldnn_aux_emitter),
-        OV_CASE(Swish, jit_mkldnn_aux_emitter),
-        OV_CASE(Hswish, jit_mkldnn_aux_emitter),
-        OV_CASE(Mish, jit_mkldnn_aux_emitter),
-        OV_CASE(Hsigmoid, jit_mkldnn_aux_emitter),
-        OV_CASE(Round, jit_mkldnn_aux_emitter),
-        OV_CASE(Add, jit_add_emitter),
-        OV_CASE(MulAdd, jit_mul_add_emitter),
-        OV_CASE(Subtract, jit_subtract_emitter),
-        OV_CASE(Multiply, jit_multiply_emitter),
-        OV_CASE(Divide, jit_divide_emitter),
-        OV_CASE(FloorMod, jit_floor_mod_emitter),
-        OV_CASE(Mod, jit_mod_emitter),
-        OV_CASE(Maximum, jit_maximum_emitter),
-        OV_CASE(Minimum, jit_minimum_emitter),
-        OV_CASE(SquaredDifference, jit_squared_difference_emitter),
-        OV_CASE(PowerDynamic, jit_power_dynamic_emitter),
-        OV_CASE(Equal, jit_equal_emitter),
-        OV_CASE(NotEqual, jit_not_equal_emitter),
-        OV_CASE(Greater, jit_greater_emitter),
-        OV_CASE(GreaterEqual, jit_greater_equal_emitter),
-        OV_CASE(Less, jit_less_emitter),
-        OV_CASE(LessEqual, jit_less_equal_emitter),
-        OV_CASE(LogicalAnd, jit_logical_and_emitter),
-        OV_CASE(LogicalOr, jit_logical_or_emitter),
-        OV_CASE(LogicalXor, jit_logical_xor_emitter),
-        OV_CASE(LogicalNot, jit_logical_not_emitter),
-        OV_CASE(PowerStatic, jit_power_static_emitter),
-        OV_CASE(Prelu, jit_prelu_emitter),
-        OV_CASE(Erf, jit_erf_emitter));
+        OV_SWITCH(MKLDNNPlugin, SupportedPrecisions, precisions, node.getAlgorithm(),
+        OV_CASE(EltwiseRelu, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseGelu, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseElu, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseTanh, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseSigmoid, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseAbs, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseSqrt, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseSoftRelu, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseExp, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseClamp, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseSwish, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseHswish, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseMish, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseHsigmoid, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseRoundHalfToEven, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseRoundHalfAwayFromZero, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseAdd, jit_add_emitter),
+        OV_CASE(EltwiseMulAdd, jit_mul_add_emitter),
+        OV_CASE(EltwiseSubtract, jit_subtract_emitter),
+        OV_CASE(EltwiseMultiply, jit_multiply_emitter),
+        OV_CASE(EltwiseDivide, jit_divide_emitter),
+        OV_CASE(EltwiseFloorMod, jit_floor_mod_emitter),
+        OV_CASE(EltwiseMod, jit_mod_emitter),
+        OV_CASE(EltwiseMaximum, jit_maximum_emitter),
+        OV_CASE(EltwiseMinimum, jit_minimum_emitter),
+        OV_CASE(EltwiseSquaredDifference, jit_squared_difference_emitter),
+        OV_CASE(EltwisePowerDynamic, jit_power_dynamic_emitter),
+        OV_CASE(EltwiseEqual, jit_equal_emitter),
+        OV_CASE(EltwiseNotEqual, jit_not_equal_emitter),
+        OV_CASE(EltwiseGreater, jit_greater_emitter),
+        OV_CASE(EltwiseGreaterEqual, jit_greater_equal_emitter),
+        OV_CASE(EltwiseLess, jit_less_emitter),
+        OV_CASE(EltwiseLessEqual, jit_less_equal_emitter),
+        OV_CASE(EltwiseLogicalAnd, jit_logical_and_emitter),
+        OV_CASE(EltwiseLogicalOr, jit_logical_or_emitter),
+        OV_CASE(EltwiseLogicalXor, jit_logical_xor_emitter),
+        OV_CASE(EltwiseLogicalNot, jit_logical_not_emitter),
+        OV_CASE(EltwisePowerStatic, jit_power_static_emitter),
+        OV_CASE(EltwisePrelu, jit_prelu_emitter),
+        OV_CASE(EltwiseErf, jit_erf_emitter));
 
         if (precisions.empty())
             IE_THROW() << "Unsupported operation type for Eltwise emitter";
@@ -426,50 +430,47 @@ private:
             exec_prec
         };
 
-        OV_SWITCH(MKLDNNPlugin, EltwiseEmitter, ctx, eltwiseNode.getOpType(),
-        OV_CASE(Relu, jit_mkldnn_aux_emitter),
-        OV_CASE(Gelu, jit_mkldnn_aux_emitter),
-        OV_CASE(Elu, jit_mkldnn_aux_emitter),
-        OV_CASE(Tanh, jit_mkldnn_aux_emitter),
-        OV_CASE(Logistic, jit_mkldnn_aux_emitter),
-        OV_CASE(Square, jit_mkldnn_aux_emitter),
-        OV_CASE(Abs, jit_mkldnn_aux_emitter),
-        OV_CASE(Sqrt, jit_mkldnn_aux_emitter),
-        OV_CASE(Linear, jit_mkldnn_aux_emitter),
-        OV_CASE(BoundedRelu, jit_mkldnn_aux_emitter),
-        OV_CASE(SoftRelu, jit_mkldnn_aux_emitter),
-        OV_CASE(Relu6, jit_mkldnn_aux_emitter),
-        OV_CASE(Exp, jit_mkldnn_aux_emitter),
-        OV_CASE(Clamp, jit_mkldnn_aux_emitter),
-        OV_CASE(Swish, jit_mkldnn_aux_emitter),
-        OV_CASE(Hswish, jit_mkldnn_aux_emitter),
-        OV_CASE(Mish, jit_mkldnn_aux_emitter),
-        OV_CASE(Hsigmoid, jit_mkldnn_aux_emitter),
-        OV_CASE(Round, jit_mkldnn_aux_emitter),
-        OV_CASE(Add, jit_add_emitter),
-        OV_CASE(MulAdd, jit_mul_add_emitter),
-        OV_CASE(Subtract, jit_subtract_emitter),
-        OV_CASE(Multiply, jit_multiply_emitter),
-        OV_CASE(Divide, jit_divide_emitter),
-        OV_CASE(FloorMod, jit_floor_mod_emitter),
-        OV_CASE(Mod, jit_mod_emitter),
-        OV_CASE(Maximum, jit_maximum_emitter),
-        OV_CASE(Minimum, jit_minimum_emitter),
-        OV_CASE(SquaredDifference, jit_squared_difference_emitter),
-        OV_CASE(PowerDynamic, jit_power_dynamic_emitter),
-        OV_CASE(Equal, jit_equal_emitter),
-        OV_CASE(NotEqual, jit_not_equal_emitter),
-        OV_CASE(Greater, jit_greater_emitter),
-        OV_CASE(GreaterEqual, jit_greater_equal_emitter),
-        OV_CASE(Less, jit_less_emitter),
-        OV_CASE(LessEqual, jit_less_equal_emitter),
-        OV_CASE(LogicalAnd, jit_logical_and_emitter),
-        OV_CASE(LogicalOr, jit_logical_or_emitter),
-        OV_CASE(LogicalXor, jit_logical_xor_emitter),
-        OV_CASE(LogicalNot, jit_logical_not_emitter),
-        OV_CASE(PowerStatic, jit_power_static_emitter),
-        OV_CASE(Prelu, jit_prelu_emitter),
-        OV_CASE(Erf, jit_erf_emitter));
+        OV_SWITCH(MKLDNNPlugin, EltwiseEmitter, ctx, eltwiseNode.getAlgorithm(),
+        OV_CASE(EltwiseRelu, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseGelu, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseElu, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseTanh, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseSigmoid, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseAbs, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseSqrt, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseSoftRelu, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseExp, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseClamp, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseSwish, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseHswish, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseMish, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseHsigmoid, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseRoundHalfToEven, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseRoundHalfAwayFromZero, jit_mkldnn_aux_emitter),
+        OV_CASE(EltwiseAdd, jit_add_emitter),
+        OV_CASE(EltwiseMulAdd, jit_mul_add_emitter),
+        OV_CASE(EltwiseSubtract, jit_subtract_emitter),
+        OV_CASE(EltwiseMultiply, jit_multiply_emitter),
+        OV_CASE(EltwiseDivide, jit_divide_emitter),
+        OV_CASE(EltwiseFloorMod, jit_floor_mod_emitter),
+        OV_CASE(EltwiseMod, jit_mod_emitter),
+        OV_CASE(EltwiseMaximum, jit_maximum_emitter),
+        OV_CASE(EltwiseMinimum, jit_minimum_emitter),
+        OV_CASE(EltwiseSquaredDifference, jit_squared_difference_emitter),
+        OV_CASE(EltwisePowerDynamic, jit_power_dynamic_emitter),
+        OV_CASE(EltwiseEqual, jit_equal_emitter),
+        OV_CASE(EltwiseNotEqual, jit_not_equal_emitter),
+        OV_CASE(EltwiseGreater, jit_greater_emitter),
+        OV_CASE(EltwiseGreaterEqual, jit_greater_equal_emitter),
+        OV_CASE(EltwiseLess, jit_less_emitter),
+        OV_CASE(EltwiseLessEqual, jit_less_equal_emitter),
+        OV_CASE(EltwiseLogicalAnd, jit_logical_and_emitter),
+        OV_CASE(EltwiseLogicalOr, jit_logical_or_emitter),
+        OV_CASE(EltwiseLogicalXor, jit_logical_xor_emitter),
+        OV_CASE(EltwiseLogicalNot, jit_logical_not_emitter),
+        OV_CASE(EltwisePowerStatic, jit_power_static_emitter),
+        OV_CASE(EltwisePrelu, jit_prelu_emitter),
+        OV_CASE(EltwiseErf, jit_erf_emitter));
 
         if (!ctx.emitter)
             IE_THROW() << "Unsupported operation type for Eltwise emitter";
@@ -512,9 +513,7 @@ private:
 
                 eltwise_post_op_idx++;
             } else {
-                auto quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(eltwiseNode.getFusedWith()[i].get());
-
-                bool do_dequantization = quantizeNode->getOpType() == QuantizeOpType::FakeQuantization;
+                bool do_dequantization = eltwiseNode.getFusedWith()[i]->getAlgorithm() == FQCommon;
                 bool do_rounding = do_dequantization || jep_.dst_prc == Precision::FP32 || i != eltwiseNode.getFusedWith().size() - 1;
                 int s_idx = vmm_dst.getIdx();
 
@@ -765,256 +764,206 @@ private:
     }
 };
 
-MKLDNNEltwiseNode::MKLDNNEltwiseNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {
-}
+std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>> MKLDNNEltwiseNode::initializers = {
+    {ngraph::op::v1::Add::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseAdd;
+    }},
+    {ngraph::op::v1::Subtract::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseSubtract;
+    }},
+    {ngraph::op::v1::Multiply::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseMultiply;
+    }},
+    {ngraph::op::v1::Divide::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseDivide;
+    }},
+    {ngraph::op::v0::SquaredDifference::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseSquaredDifference;
+    }},
+    {ngraph::op::v1::Maximum::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseMaximum;
+    }},
+    {ngraph::op::v1::Minimum::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseMinimum;
+    }},
+    {ngraph::op::v1::Mod::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseMod;
+    }},
+    {ngraph::op::v1::FloorMod::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseFloorMod;
+    }},
+    {ngraph::op::v1::Power::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwisePowerDynamic;
+    }},
+    {PowerStaticNode::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        auto powerStatic = getNgraphOpAs<PowerStaticNode>(op);
+        node.algorithm = EltwisePowerStatic;
+        node.alpha = powerStatic->get_power();
+        node.beta = powerStatic->get_scale();
+        node.gamma = powerStatic->get_shift();
+    }},
+    {ngraph::op::v1::Equal::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseEqual;
+    }},
+    {ngraph::op::v1::NotEqual::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseNotEqual;
+    }},
+    {ngraph::op::v1::Greater::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseGreater;
+    }},
+    {ngraph::op::v1::GreaterEqual::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseGreaterEqual;
+    }},
+    {ngraph::op::v1::Less::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseLess;
+    }},
+    {ngraph::op::v1::LessEqual::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseLessEqual;
+    }},
+    {ngraph::op::v1::LogicalAnd::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseLogicalAnd;
+    }},
+    {ngraph::op::v1::LogicalOr::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseLogicalOr;
+    }},
+    {ngraph::op::v1::LogicalXor::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseLogicalXor;
+    }},
+    {ngraph::op::v1::LogicalNot::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseLogicalNot;
+    }},
+    {ngraph::op::v0::Relu::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseRelu;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_relu;
+    }},
+    {LeakyReluNode::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        auto leakyRelu = getNgraphOpAs<LeakyReluNode>(op);
+        node.algorithm = EltwiseRelu;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_relu;
+        node.alpha = leakyRelu->get_slope();
+        node.beta = 0.0f;
+    }},
+    {ngraph::op::v0::Gelu::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseGelu;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_gelu_erf;
+    }},
+    {ngraph::op::v7::Gelu::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        auto gelu = getNgraphOpAs<ngraph::op::v7::Gelu>(op);
+        node.algorithm = EltwiseGelu;
+        ngraph::op::GeluApproximationMode approximationMode = gelu->get_approximation_mode();
+        if (approximationMode == ngraph::op::GeluApproximationMode::ERF)
+            node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_gelu_erf;
+        else if (approximationMode == ngraph::op::GeluApproximationMode::TANH)
+            node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_gelu_tanh;
+        else
+            IE_THROW(NotImplemented) << "CPU Eltwise node doesn't support ngraph operation Gelu with approximation mode: " << approximationMode;
+    }},
+    {ngraph::op::v0::Elu::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        auto eluOp = getNgraphOpAs<ngraph::op::v0::Elu>(op);
 
-InferenceEngine::details::caseless_map<std::string, std::function<void(GenericLayer*, EltwiseOpType&, mkldnn::algorithm&, float&, float&)>>
-MKLDNNEltwiseNode::initializers = {
-        {"relu", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = activationLayer->GetParamAsFloat("negative_slope", 0.0f);
-            beta = 0.0f;
-            opType = Relu;
-            algorithm = mkldnn::algorithm::eltwise_relu;
-        }},
-        {"gelu", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Gelu;
-            std::string approximationMode = activationLayer->GetParamAsString("approximation_mode", "erf");
-            if (approximationMode == "erf")
-                algorithm = mkldnn::algorithm::eltwise_gelu_erf;
-            else if (approximationMode == "tanh")
-                algorithm = mkldnn::algorithm::eltwise_gelu_tanh;
-            else
-                IE_THROW() << "Gelu layer with name " << activationLayer->name << " doesn't support approximation mode " << approximationMode;
-        }},
-        {"elu", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = activationLayer->GetParamAsFloat("alpha", 1.0f);
-            beta = 0.0f;
-            opType = Elu;
-            algorithm = mkldnn::algorithm::eltwise_elu;
-        }},
-        {"tanh", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Tanh;
-            algorithm = mkldnn::algorithm::eltwise_tanh;
-        }},
-        {"sigmoid", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Logistic;
-            algorithm = mkldnn::algorithm::eltwise_logistic;
-        }},
-        {"logistic", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Logistic;
-            algorithm = mkldnn::algorithm::eltwise_logistic;
-        }},
-        {"square", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Square;
-            algorithm = mkldnn::algorithm::eltwise_square;
-        }},
-        {"abs", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Abs;
-            algorithm = mkldnn::algorithm::eltwise_abs;
-        }},
-        {"sqrt", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Sqrt;
-            algorithm = mkldnn::algorithm::eltwise_sqrt;
-        }},
-        {"linear", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = activationLayer->GetParamAsFloat("alpha", 1.0f);
-            beta = activationLayer->GetParamAsFloat("beta", 0.0f);
-            opType = Linear;
-            algorithm = mkldnn::algorithm::eltwise_linear;
-        }},
-        {"bounded_relu", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = activationLayer->GetParamAsFloat("alpha", 0.0f);
-            beta = 0.0f;
-            opType = BoundedRelu;
-            algorithm = mkldnn::algorithm::eltwise_bounded_relu;
-        }},
-        {"softplus", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = SoftRelu;
-            algorithm = mkldnn::algorithm::eltwise_soft_relu;
-        }},
-        {"relu6", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = activationLayer->GetParamAsFloat("n", 6.0f);
-            beta = 0.0f;
-            opType = Relu6;
-            algorithm = mkldnn::algorithm::eltwise_bounded_relu;
-        }},
-        {"clamp", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = activationLayer->GetParamAsFloat("min", 1.0f);
-            beta = activationLayer->GetParamAsFloat("max", 0.0f);
-            opType = Clamp;
-            algorithm = mkldnn::algorithm::eltwise_clip;
-        }},
-        {"exp", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Exp;
-            algorithm = mkldnn::algorithm::eltwise_exp;
-        }},
-        {"not", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = LogicalNot;
-        }},
-        {"swish", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = activationLayer->GetParamAsFloat("alpha", 1.0f);
-            beta = 0.0f;
-            opType = Swish;
-            algorithm = mkldnn::algorithm::eltwise_swish;
-        }},
-        {"hswish", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Hswish;
-            algorithm = mkldnn::algorithm::eltwise_hswish;
-        }},
-        {"mish", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Mish;
-            algorithm = mkldnn::algorithm::eltwise_mish;
-        }},
-        {"hsigmoid", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Hsigmoid;
-            algorithm = mkldnn::algorithm::eltwise_hsigmoid;
-        }},
-        {"round", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Round;
-            std::string mode = activationLayer->GetParamAsString("mode", "half_to_even");
-            if (mode == "half_to_even")
-                algorithm = mkldnn::algorithm::eltwise_round_half_to_even;
-            else if (mode == "half_away_from_zero")
-                algorithm = mkldnn::algorithm::eltwise_round_half_away_from_zero;
-            else
-                IE_THROW() << "Round layer with name " << activationLayer->name << " doesn't support mode " << mode;
-        }},
-        {"erf", [](GenericLayer* activationLayer, EltwiseOpType& opType, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
-            alpha = 0.0f;
-            beta = 0.0f;
-            opType = Erf;
-        }},
+        node.alpha = static_cast<float>(eluOp->get_alpha());
+        node.algorithm = EltwiseElu;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_elu;
+    }},
+    {ngraph::op::v0::Tanh::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseTanh;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_tanh;
+    }},
+    {ngraph::op::v0::Sigmoid::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseSigmoid;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_logistic;
+    }},
+    {ngraph::op::v0::Abs::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseAbs;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_abs;
+    }},
+    {ngraph::op::v0::Sqrt::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseSqrt;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_sqrt;
+    }},
+    {ngraph::op::v0::Clamp::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        auto clampOp = getNgraphOpAs<ngraph::op::v0::Clamp>(op);
+
+        node.alpha = static_cast<float>(clampOp->get_min());
+        node.beta = static_cast<float>(clampOp->get_max());
+        node.algorithm = EltwiseClamp;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_clip;
+    }},
+    {ngraph::op::v0::Exp::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseExp;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_exp;
+    }},
+    {SwishNode::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        auto swishOp = getNgraphOpAs<SwishNode>(op);
+        node.algorithm = EltwiseSwish;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_swish;
+        node.alpha = swishOp->get_alpha();
+    }},
+    {ngraph::op::v4::HSwish::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseHswish;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_hswish;
+    }},
+    {ngraph::op::v4::Mish::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseMish;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_mish;
+    }},
+    {ngraph::op::v5::HSigmoid::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseHsigmoid;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_hsigmoid;
+    }},
+    {ngraph::op::v5::Round::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        auto roundOp = getNgraphOpAs<ngraph::op::v5::Round>(op);
+
+        switch (roundOp->get_mode()) {
+            case ngraph::op::v5::Round::RoundMode::HALF_TO_EVEN:
+                node.algorithm = EltwiseRoundHalfToEven;
+                node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_round_half_to_even;
+                break;
+            case ngraph::op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO:
+                node.algorithm = EltwiseRoundHalfAwayFromZero;
+                node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_round_half_away_from_zero;
+                break;
+        }
+    }},
+    {ngraph::op::v0::PRelu::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwisePrelu;
+    }},
+    {ngraph::op::v0::Erf::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseErf;
+    }},
+    {ngraph::op::v4::SoftPlus::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
+        node.algorithm = EltwiseSoftRelu;
+        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_soft_relu;
+    }},
 };
 
-void MKLDNNEltwiseNode::init() {
-    InferenceEngine::details::CaselessEq<std::string> comparator;
-    auto layerType = getCnnLayer().get()->type;
-
-    auto * eltwiseLayer = dynamic_cast<EltwiseLayer*>(getCnnLayer().get());
-    if (eltwiseLayer) {
-        if (!eltwiseLayer->coeff.empty())
-            IE_THROW() << "Eltwise node with name `" << getName() << "` doesn't support input coefficients.";
-
-        switch (eltwiseLayer->_operation) {
-            case EltwiseLayer::Sum: eltwiseOp = Add; break;
-            case EltwiseLayer::Prod: eltwiseOp = Multiply; break;
-            case EltwiseLayer::Max: eltwiseOp = Maximum; break;
-            case EltwiseLayer::Sub: eltwiseOp = Subtract; break;
-            case EltwiseLayer::Min: eltwiseOp = Minimum; break;
-            case EltwiseLayer::Div: eltwiseOp = Divide; break;
-            case EltwiseLayer::Squared_diff: eltwiseOp = SquaredDifference; break;
-            case EltwiseLayer::Floor_mod: eltwiseOp = FloorMod; break;
-            case EltwiseLayer::Pow: eltwiseOp = PowerDynamic; break;
-            case EltwiseLayer::Equal: eltwiseOp = Equal; break;
-            case EltwiseLayer::Not_equal: eltwiseOp = NotEqual; break;
-            case EltwiseLayer::Greater: eltwiseOp = Greater; break;
-            case EltwiseLayer::Greater_equal: eltwiseOp = GreaterEqual; break;
-            case EltwiseLayer::Less: eltwiseOp = Less; break;
-            case EltwiseLayer::Less_equal: eltwiseOp = LessEqual; break;
-            case EltwiseLayer::Logical_AND: eltwiseOp = LogicalAnd; break;
-            case EltwiseLayer::Logical_OR: eltwiseOp = LogicalOr; break;
-            case EltwiseLayer::Logical_XOR: eltwiseOp = LogicalXor; break;
-            default: IE_THROW() << "Unsupported algorithm for Eltwise node with name `" << getName() << "`.";
-        }
-    } else if (comparator(layerType, "mod")) {
-        eltwiseOp = Mod;
-    } else if (comparator(layerType, "power")) {
-        eltwiseOp = PowerStatic;
-
-        auto *powerLayer = dynamic_cast<InferenceEngine::PowerLayer *>(getCnnLayer().get());
-        if (powerLayer == nullptr)
-            IE_THROW() << "Cannot convert power layer.";
-
-        alpha = powerLayer->power;
-        beta = powerLayer->scale;
-        gamma = powerLayer->offset;
-    } else if (comparator(layerType, "scaleshift")) {
-        if (getCnnLayer().get()->blobs.size() == 2) {
-            eltwiseOp = MulAdd;
-            eltwiseAlgorithm = mkldnn::algorithm::depthwise_scale_shift;
-        } else {
-            eltwiseOp = Multiply;
-        }
-    } else if (comparator(layerType, "prelu")) {
-        eltwiseOp = Prelu;
-        eltwiseAlgorithm = mkldnn::algorithm::depthwise_prelu;
-    } else if (comparator(layerType, "activation") && initializers.find(getCnnLayer().get()->GetParamAsString("type")) != initializers.end()) {
-        initializers[getCnnLayer().get()->GetParamAsString("type")](getCnnLayer().get(), eltwiseOp, eltwiseAlgorithm, alpha, beta);
-    } else if (comparator(layerType, "relu") ||
-               comparator(layerType, "gelu") ||
-               comparator(layerType, "elu") ||
-               comparator(layerType, "sigmoid") ||
-               comparator(layerType, "logistic") ||
-               comparator(layerType, "tanh") ||
-               comparator(layerType, "relu6") ||
-               comparator(layerType, "exp") ||
-               comparator(layerType, "not") ||
-               comparator(layerType, "clamp") ||
-               comparator(layerType, "swish") ||
-               comparator(layerType, "hswish") ||
-               comparator(layerType, "mish") ||
-               comparator(layerType, "hsigmoid") ||
-               comparator(layerType, "round") ||
-               comparator(layerType, "softplus")) {
-        initializers[layerType](getCnnLayer().get(), eltwiseOp, eltwiseAlgorithm, alpha, beta);
-    } else if (comparator(layerType, "erf")) {
-        eltwiseOp = Erf;
+MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    if (initializers.find(op->get_type_info()) != initializers.end()) {
+        initializers[op->get_type_info()](op, *this);
     } else {
-        IE_THROW() << "Unsupported algorithm for Eltwise node with name `" << getName() << "`.";
+        IE_THROW(NotImplemented)
+            << "CPU Eltwise node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name();
     }
 }
 
 size_t MKLDNNEltwiseNode::getOpInputsNum() const {
-    switch (getOpType()) {
-        case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: case PowerStatic:
-        case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
-        case Mish: case Hsigmoid: case Round:
-        case LogicalNot:
-        case Erf:
+    switch (getAlgorithm()) {
+        case EltwiseRelu: case EltwiseGelu: case EltwiseElu: case EltwiseTanh: case EltwiseSigmoid: case EltwiseAbs: case EltwiseSqrt:
+        case EltwiseSoftRelu: case EltwiseExp: case EltwiseClamp: case EltwiseErf: case EltwiseLogicalNot: case EltwisePowerStatic:
+        case EltwiseSwish: case EltwiseHswish: case EltwiseMish: case EltwiseHsigmoid: case EltwiseRoundHalfToEven: case EltwiseRoundHalfAwayFromZero:
             return 1;
-        case Add: case Subtract: case Multiply: case Divide: case FloorMod: case Mod: case Maximum: case Minimum: case SquaredDifference:
-        case PowerDynamic: case Equal: case NotEqual: case Greater: case GreaterEqual: case Less: case LessEqual: case LogicalAnd:
-        case LogicalOr: case LogicalXor: case Prelu:
+        case EltwiseAdd: case EltwiseSubtract: case EltwiseMultiply: case EltwiseDivide: case EltwiseFloorMod: case EltwiseMod: case EltwiseMaximum:
+        case EltwiseMinimum: case EltwiseSquaredDifference: case EltwisePowerDynamic: case EltwiseEqual: case EltwiseNotEqual: case EltwiseGreater:
+        case EltwiseGreaterEqual: case EltwiseLess:  case EltwiseLessEqual: case EltwiseLogicalAnd: case EltwiseLogicalOr: case EltwiseLogicalXor:
+        case EltwisePrelu:
             return 2;
-        case MulAdd:
+        case EltwiseMulAdd:
             return 3;
         default: IE_THROW() << "Unsupported operation for Eltwise node with name `" << getName() << "`.";
     }
 }
 
-bool MKLDNNEltwiseNode::isSum() {
-    return eltwiseOp == Add;
-}
-
 bool MKLDNNEltwiseNode::isWithBroadcast() {
     auto oDims = outDims[0].ToSizeVector();
     for (size_t i = 0; i < inDims.size(); i++) {
@@ -1065,14 +1014,14 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
                            << " (actual = " << getParentEdges().size() << ")";
 
     std::vector<InferenceEngine::Precision> inputPrecisions;
-    for (int i = 0; i < getCnnLayer()->insData.size(); i++) {
-        inputPrecisions.push_back(getCnnLayer()->insData[i].lock()->getPrecision());
+    for (const auto &i : getOriginalInputPrecisions()) {
+        inputPrecisions.push_back(i);
     }
 
     for (auto& fusedNode : fusedWith) {
         if (fusedNode->getType() == Eltwise) {
-            for (int i = 1; i < fusedNode->getCnnLayer()->insData.size(); i++) {
-                inputPrecisions.push_back(fusedNode->getCnnLayer()->insData[i].lock()->getPrecision());
+            for (int i = 1; i < fusedNode->getOriginalInputsNumber(); i++) {
+                inputPrecisions.push_back(fusedNode->getOriginalInputPrecisionAtPort(i));
             }
         }
     }
@@ -1080,12 +1029,9 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
     if (inputPrecisions.size() != getParentEdges().size())
         IE_THROW() << "Eltwise node with name `" << getName() << "` has invalid input precisions configuration.";
 
-    InferenceEngine::Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision();
+    InferenceEngine::Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
     if (!fusedWith.empty()) {
-        auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
-        if (lastFusedLayer) {
-            outputPrecision = lastFusedLayer->outData[0]->getPrecision();
-        }
+        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
 
     if (!mayiuse(avx512_core)) {
@@ -1119,9 +1065,10 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
 
     // TODO: delete after new LPT (ngraph based) is merged
     // WA is needed to handle bug in LPT that produces wrong precision after average pooling (I8/U8 instead of FP32)
-    if (eltwiseOp == MulAdd && (inputPrecisions[0] == Precision::U8 || inputPrecisions[0] == Precision::I8)) {
-        auto poolingLayer = dynamic_cast<PoolingLayer*>(getParentEdgesAtPort(0)[0]->getParent()->getCnnLayer().get());
-        if (poolingLayer && poolingLayer->_type == PoolingLayer::AVG) {
+    if ((getAlgorithm() == EltwiseMulAdd || getAlgorithm() == EltwisePowerStatic) &&
+            (inputPrecisions[0] == Precision::U8 || inputPrecisions[0] == Precision::I8)) {
+        auto parentNode = getParentEdgesAtPort(0)[0]->getParent();
+        if (getParentEdgesAtPort(0)[0]->getParent()->getAlgorithm() == PoolingAvg) {
             inputPrecisions[0] = Precision::FP32;
         }
     }
@@ -1134,7 +1081,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
 
     auto initDesc = [&] (LayoutType lt) -> PrimitiveDescInfo {
         auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> TensorDesc {
-            if (lt == ChannelsFirst) {
+            if (lt == ChannelsFirst && edge->getDims().ndims() != 1) {
                 auto dims = edge->getDims().ToSizeVector();
                 auto ndims = dims.size();
                 std::vector<size_t> order(ndims);
@@ -1150,7 +1097,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
                 }
 
                 return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset});
-            } else if (lt == Blocked && edge->getDims()[1] != 1) {
+            } else if (lt == Blocked && edge->getDims().ndims() != 1 && edge->getDims()[1] != 1) {
                 size_t blockSize = mayiuse(x64::avx512_common) ? 16 : 8;
 
                 std::vector<size_t> blocks = edge->getDims().ToSizeVector();
@@ -1180,7 +1127,6 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
             dataConfig.inPlace = (!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1;
             dataConfig.constant = false;
 
-
             dataConfig.desc = createMemoryDesc(getParentEdgeAt(i), inputPrecisions[i], offset);
 
             config.inConfs.push_back(dataConfig);
@@ -1211,13 +1157,15 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
     bool isChannelsFirstApplicable = one_of(getChildEdgeAt(0)->getDims().ndims(), 1, 2, 4, 5);
     for (size_t i = 0; i < getParentEdges().size(); i++) {
         isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getParentEdgeAt(i)->getDims().ndims(), 1, 2, 4, 5);
-        isChannelsFirstApplicable = isChannelsFirstApplicable && getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims();
+        isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getParentEdgeAt(i)->getDims().ndims() != 1,
+                                                                             getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims());
     }
 
-    bool isBlockedApplicable = one_of(getChildEdgeAt(0)->getDims().ndims(), 4, 5);
+    bool isBlockedApplicable = one_of(getChildEdgeAt(0)->getDims().ndims(), 1, 4, 5);
     for (size_t i = 0; i < getParentEdges().size(); i++) {
-        isBlockedApplicable = isBlockedApplicable && one_of(getParentEdgeAt(i)->getDims().ndims(), 4, 5);
-        isBlockedApplicable = isBlockedApplicable && getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims();
+        isBlockedApplicable = isBlockedApplicable && one_of(getParentEdgeAt(i)->getDims().ndims(), 1, 4, 5);
+        isBlockedApplicable = isBlockedApplicable && implication(getParentEdgeAt(i)->getDims().ndims() != 1,
+                                                                 getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims());
     }
 
     if (isChannelsFirstApplicable)
@@ -1339,7 +1287,7 @@ void MKLDNNEltwiseNode::createPrimitive() {
     auto outOrder = config.outConfs[0].desc.getBlockingDesc().getOrder();
     size_t oc_size = 0;
     offsets_oc.resize(tensorRank, 0);
-    if (isFusedWith(Quantize)) {
+    if (isFusedWith(FakeQuantize)) {
         size_t offset_oc = 1;
         for (int i = outOrder.size() - 1; i >= 0; i--) {
             if (outOrder[i] == 1) {
@@ -1409,7 +1357,7 @@ void MKLDNNEltwiseNode::createPrimitive() {
                 }
                 collapseLastDims(dims_out, 1);
 
-                if (isFusedWith(Quantize)) {
+                if (isFusedWith(FakeQuantize)) {
                     collapseLastOffsets(offsets_oc, 1);
                 }
             } else {
@@ -1616,8 +1564,8 @@ void MKLDNNEltwiseNode::executeReference(const std::vector<const uint8_t *>& src
     size_t inputNum = src_ptrs.size();
 
     std::shared_ptr<ref_eltwise_scalar_fwd_t> ref_eltwise_injector = nullptr;
-    if (eltwiseAlgorithm != mkldnn::algorithm::undef) {
-        ref_eltwise_injector = std::make_shared<ref_eltwise_scalar_fwd_t>(static_cast<mkldnn_alg_kind_t>(eltwiseAlgorithm), alpha, beta, 1.f);
+    if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
+        ref_eltwise_injector = std::make_shared<ref_eltwise_scalar_fwd_t>(static_cast<mkldnn_alg_kind_t>(getMKLDNNAlgorithm()), alpha, beta, 1.f);
     }
 
     parallel_nt(0, [&](const int ithr, const int nthr) {
@@ -1652,34 +1600,34 @@ void MKLDNNEltwiseNode::executeReference(const std::vector<const uint8_t *>& src
             }
             float* dst_ptr_f = reinterpret_cast<float *>(dst_ptr + index_out);
 
-            switch (getOpType()) {
-                case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt:
-                case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish:
-                case Mish: case Hsigmoid: case Round:
+            switch (getAlgorithm()) {
+                case EltwiseRelu: case EltwiseGelu: case EltwiseElu: case EltwiseTanh: case EltwiseSigmoid: case EltwiseAbs:
+                case EltwiseSqrt: case EltwiseSoftRelu: case EltwiseExp: case EltwiseClamp:
+                case EltwiseSwish: case EltwiseHswish: case EltwiseMish: case EltwiseHsigmoid: case EltwiseRoundHalfToEven: case EltwiseRoundHalfAwayFromZero:
                     *dst_ptr_f = ref_eltwise_injector->compute_scalar(src_f[0]); break;
-                case Add:               *dst_ptr_f = src_f[0] + src_f[1]; break;
-                case MulAdd:            *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break;
-                case Subtract:          *dst_ptr_f = src_f[0] - src_f[1]; break;
-                case Multiply:          *dst_ptr_f = src_f[0] * src_f[1]; break;
-                case Divide:            *dst_ptr_f = src_f[0] / src_f[1]; break;
-                case FloorMod:          *dst_ptr_f = src_f[0] - floorf(src_f[0] / src_f[1]) * src_f[1]; break;
-                case Mod:               *dst_ptr_f = src_f[0] - truncf(src_f[0] / src_f[1]) * src_f[1]; break;
-                case Maximum:           *dst_ptr_f = std::max(src_f[0], src_f[1]); break;
-                case Minimum:           *dst_ptr_f = std::min(src_f[0], src_f[1]); break;
-                case SquaredDifference: *dst_ptr_f = powf((src_f[0] - src_f[1]), 2.f); break;
-                case PowerDynamic:      *dst_ptr_f = powf(src_f[0], src_f[1]); break;
-                case Equal:             *dst_ptr_f = src_f[0] == src_f[1]; break;
-                case NotEqual:          *dst_ptr_f = src_f[0] != src_f[1]; break;
-                case Greater:           *dst_ptr_f = src_f[0] > src_f[1]; break;
-                case GreaterEqual:      *dst_ptr_f = src_f[0] >= src_f[1]; break;
-                case Less:              *dst_ptr_f = src_f[0] < src_f[1]; break;
-                case LessEqual:         *dst_ptr_f = src_f[0] <= src_f[1]; break;
-                case LogicalAnd:        *dst_ptr_f = src_f[0] && src_f[1]; break;
-                case LogicalOr:         *dst_ptr_f = src_f[0] || src_f[1]; break;
-                case LogicalXor:        *dst_ptr_f = (src_f[0] || src_f[1]) - (src_f[0] && src_f[1]); break;
-                case LogicalNot:        *dst_ptr_f = !src_f[0]; break;
-                case PowerStatic:       *dst_ptr_f = powf(beta * src_f[0] + gamma, alpha); break;
-                case Prelu:             *dst_ptr_f = src_f[0] > 0 ? src_f[0] : src_f[0] * src_f[1]; break;
+                case EltwiseAdd:               *dst_ptr_f = src_f[0] + src_f[1]; break;
+                case EltwiseMulAdd:            *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break;
+                case EltwiseSubtract:          *dst_ptr_f = src_f[0] - src_f[1]; break;
+                case EltwiseMultiply:          *dst_ptr_f = src_f[0] * src_f[1]; break;
+                case EltwiseDivide:            *dst_ptr_f = src_f[0] / src_f[1]; break;
+                case EltwiseFloorMod:          *dst_ptr_f = src_f[0] - floorf(src_f[0] / src_f[1]) * src_f[1]; break;
+                case EltwiseMod:               *dst_ptr_f = src_f[0] - truncf(src_f[0] / src_f[1]) * src_f[1]; break;
+                case EltwiseMaximum:           *dst_ptr_f = std::max(src_f[0], src_f[1]); break;
+                case EltwiseMinimum:           *dst_ptr_f = std::min(src_f[0], src_f[1]); break;
+                case EltwiseSquaredDifference: *dst_ptr_f = powf((src_f[0] - src_f[1]), 2.f); break;
+                case EltwisePowerDynamic:      *dst_ptr_f = powf(src_f[0], src_f[1]); break;
+                case EltwiseEqual:             *dst_ptr_f = src_f[0] == src_f[1]; break;
+                case EltwiseNotEqual:          *dst_ptr_f = src_f[0] != src_f[1]; break;
+                case EltwiseGreater:           *dst_ptr_f = src_f[0] > src_f[1]; break;
+                case EltwiseGreaterEqual:      *dst_ptr_f = src_f[0] >= src_f[1]; break;
+                case EltwiseLess:              *dst_ptr_f = src_f[0] < src_f[1]; break;
+                case EltwiseLessEqual:         *dst_ptr_f = src_f[0] <= src_f[1]; break;
+                case EltwiseLogicalAnd:        *dst_ptr_f = src_f[0] && src_f[1]; break;
+                case EltwiseLogicalOr:         *dst_ptr_f = src_f[0] || src_f[1]; break;
+                case EltwiseLogicalXor:        *dst_ptr_f = (src_f[0] || src_f[1]) - (src_f[0] && src_f[1]); break;
+                case EltwiseLogicalNot:        *dst_ptr_f = !src_f[0]; break;
+                case EltwisePowerStatic:       *dst_ptr_f = powf(beta * src_f[0] + gamma, alpha); break;
+                case EltwisePrelu:             *dst_ptr_f = src_f[0] > 0 ? src_f[0] : src_f[0] * src_f[1]; break;
                 default: IE_THROW() << "Unsupported operation type for Eltwise node with name `" << getName() << "`";
             }
         }
@@ -1737,79 +1685,144 @@ bool MKLDNNEltwiseNode::canBeInPlace() const {
     return getParentEdgesAtPort(0)[0].get()->getDims() == getChildEdgesAtPort(0)[0].get()->getDims();
 }
 
-void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) {
+void MKLDNNEltwiseNode::fillScalesAndShifts(const MKLDNNNode *parentNode) {
+    const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
+        auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
+        auto constBlob = constInputNode->getConstBlob();
+        auto srtPtr = constBlob->cbuffer().as<int8_t *>();
+        buffer.resize(constBlob->size());
+        cpu_convert(srtPtr, &buffer[0], constBlob->getTensorDesc().getPrecision(), Precision::FP32, constBlob->size());
+    };
+
+    const size_t constPort = getParentEdgesAtPort(0)[0]->getParent().get() == parentNode ? 1 : 0;
+
+    if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) {
+        fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), scales);
+    } else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) {
+        fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), shifts);
+    } else if (one_of(getAlgorithm(), EltwiseMulAdd)) {
+        fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
+        fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts);
+    } else if (one_of(getAlgorithm(), EltwisePowerStatic)) {
+        const auto power = dynamic_cast<const MKLDNNEltwiseNode *>(this);
+        if (!power) {
+            IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
+        }
+        scales.push_back(power->getBeta());
+        shifts.push_back(power->getGamma());
+    }
+
+    const size_t bufferSize = static_cast<size_t>(outDims[0][outDims[0].ndims() > 1 ? 1 : 0]);
+    const size_t bufferSizeAligned = rnd_up(bufferSize, 16);
+
+    size_t initSize = scales.size();
+    if (initSize > 0) {
+        scales.resize(bufferSizeAligned, 0);
+        if (initSize == 1) {
+            std::fill(scales.begin() + 1, scales.begin() + bufferSize, scales[0]);
+        }
+    }
+
+    initSize = shifts.size();
+    if (initSize > 0) {
+        shifts.resize(bufferSizeAligned, 0);
+        if (initSize == 1) {
+            std::fill(shifts.begin() + 1, shifts.begin() + bufferSize, shifts[0]);
+        }
+    }
+
     switch (getAlgorithm()) {
-        case mkldnn::algorithm::eltwise_relu:
-        case mkldnn::algorithm::eltwise_tanh:
-        case mkldnn::algorithm::eltwise_elu:
-        case mkldnn::algorithm::eltwise_square:
-        case mkldnn::algorithm::eltwise_abs:
-        case mkldnn::algorithm::eltwise_sqrt:
-        case mkldnn::algorithm::eltwise_linear:
-        case mkldnn::algorithm::eltwise_bounded_relu:
-        case mkldnn::algorithm::eltwise_soft_relu:
-        case mkldnn::algorithm::eltwise_logistic:
-        case mkldnn::algorithm::eltwise_exp:
-        case mkldnn::algorithm::eltwise_gelu_erf:
-        case mkldnn::algorithm::eltwise_gelu_tanh:
-        case mkldnn::algorithm::eltwise_clip:
-        case mkldnn::algorithm::eltwise_swish:
-        case mkldnn::algorithm::eltwise_hswish:
-        case mkldnn::algorithm::eltwise_mish:
-        case mkldnn::algorithm::eltwise_hsigmoid:
-        case mkldnn::algorithm::eltwise_round_half_to_even:
-        case mkldnn::algorithm::eltwise_round_half_away_from_zero:
-            ops.append_eltwise(1.0, getAlgorithm(), getAlpha(), getBeta());
+        case EltwiseAdd: {
+            scales.resize(bufferSizeAligned, 1.0f);
             break;
-        case mkldnn::algorithm::depthwise_scale_shift:
-        case mkldnn::algorithm::depthwise_prelu:
-            if (scales.empty() && shifts.empty()) {
-                size_t bufferSize = static_cast<size_t>(outDims[0][outDims[0].size() > 1 ? 1 : 0]);
-                size_t bufferSizeAligned = rnd_up(bufferSize, 16);
-
-                Blob::Ptr scalesBlob = getCnnLayer()->blobs["weights"];
-                if (scalesBlob == nullptr)
-                    IE_THROW() << "Cannot get weights blob in Eltwise node with name `" << getName() << "`";
-                scales.resize(bufferSizeAligned, 0);
-                const float *scalesBufferPtr = scalesBlob->buffer().as<float *>();
-                for (int i = 0; i < bufferSize; i++) {
-                    scales[i] = scalesBufferPtr[scalesBlob->size() == 1 ? 0 : i];
-                }
-
-                Blob::Ptr shiftsBlob = getCnnLayer()->blobs["biases"];
-                if (shiftsBlob != nullptr) {
-                    shifts.resize(bufferSizeAligned, 0);
-                    const float *shiftsBufferPtr = shiftsBlob->buffer().as<float *>();
-                    for (int i = 0; i < bufferSize; i++) {
-                        shifts[i] = shiftsBufferPtr[shiftsBlob->size() == 1 ? 0 : i];
-                    }
-                }
-            }
-
-            ops.append_depthwise(getAlgorithm(), &scales[0], shifts.empty() ? nullptr : &shifts[0]);
+        }
+        case EltwiseSubtract: {
+            scales.resize(bufferSizeAligned, 1.0f);
+            std::transform(shifts.begin(), shifts.end(), shifts.begin(), [](float shift){ return -1.0f * shift; });
             break;
-        default: IE_THROW() << "Appending Eltwise node with name `" << getName() << "` as post operation is not supported";
+        }
+        case EltwiseMultiply: {
+            shifts.resize(bufferSizeAligned, 0.0f);
+            break;
+        }
+        case EltwiseDivide: {
+            shifts.resize(bufferSizeAligned, 0.0f);
+            std::transform(scales.begin(), scales.end(), scales.begin(), [](float scale){ return 1.0f / scale; });
+            break;
+        }
+        default: break;
+    }
+}
+
+void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
+    // Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API.
+    specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd &&
+            getParentEdgesAtPort(0)[0]->getDims().ToSizeVector() == getParentEdgesAtPort(1)[0]->getDims().ToSizeVector();
+    if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
+        fillScalesAndShifts(parentNode.get());
+    }
+    MKLDNNNode::fuseInto(parentNode);
+}
+
+void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) {
+    const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
+    if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
+        switch (getMKLDNNAlgorithm()) {
+            case mkldnn::algorithm::eltwise_relu:
+            case mkldnn::algorithm::eltwise_tanh:
+            case mkldnn::algorithm::eltwise_elu:
+            case mkldnn::algorithm::eltwise_square:
+            case mkldnn::algorithm::eltwise_abs:
+            case mkldnn::algorithm::eltwise_sqrt:
+            case mkldnn::algorithm::eltwise_linear:
+            case mkldnn::algorithm::eltwise_bounded_relu:
+            case mkldnn::algorithm::eltwise_soft_relu:
+            case mkldnn::algorithm::eltwise_logistic:
+            case mkldnn::algorithm::eltwise_exp:
+            case mkldnn::algorithm::eltwise_gelu_erf:
+            case mkldnn::algorithm::eltwise_gelu_tanh:
+            case mkldnn::algorithm::eltwise_clip:
+            case mkldnn::algorithm::eltwise_swish:
+            case mkldnn::algorithm::eltwise_hswish:
+            case mkldnn::algorithm::eltwise_mish:
+            case mkldnn::algorithm::eltwise_hsigmoid:
+            case mkldnn::algorithm::eltwise_round_half_to_even:
+            case mkldnn::algorithm::eltwise_round_half_away_from_zero:
+                ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
+                break;
+            default: IE_THROW() << errorPrefix << "as post operation is not supported";
+        }
+    } else {
+        switch (getAlgorithm()) {
+            case EltwiseAdd:
+            case EltwiseSubtract:
+            case EltwiseMultiply:
+            case EltwiseDivide:
+            case EltwiseMulAdd:
+            case EltwisePowerStatic:
+                if (scales.empty() || shifts.empty())
+                    IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
+                ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scales[0], &shifts[0]);
+                break;
+            case EltwisePrelu:
+                if (scales.empty())
+                    IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
+                ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scales[0], nullptr);
+                break;
+            default: IE_THROW() << errorPrefix << "as post operation is not supported";
+        }
     }
 }
 
 bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
-    auto isOneOf = [](EltwiseOpType alg, std::vector<EltwiseOpType> algs) {
-        for (auto a : algs) {
-            if (alg == a) {
-                return true;
-            }
-        }
-        return false;
-    };
-
-    auto isSuitableNode = [](const MKLDNNEltwiseNode* node) {
+    auto isSuitableNode = [this](const MKLDNNEltwiseNode* node) {
         // [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results
         // we disable its fusing otherwise there is no guarantee it will be executed it I32
         // [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32
         // (all should be handled via explicit convert operations)
-        if (node->getOpType() == Divide) {
-            for (int i = 0; i < node->getCnnLayer()->insData.size(); i++) {
-                if (node->getCnnLayer()->insData[i].lock()->getPrecision() == Precision::I32) {
+        if (node->getAlgorithm() == EltwiseDivide) {
+            for (const auto &originalInputPrecision : getOriginalInputPrecisions()) {
+                if (originalInputPrecision == Precision::I32) {
                     return false;
                 }
             }
@@ -1826,26 +1839,22 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
     }
 
     // FQ inputs with quantization parameters will be hided inside post_op object, so will not increase inputs number
-    size_t addedInputEdgesNum = node->getType() != Quantize ? (node->getParentEdges().size() - 1) : 0;
+    size_t addedInputEdgesNum = node->getType() != FakeQuantize ? (node->getParentEdges().size() - 1) : 0;
     if (getParentEdges().size() + addedInputEdgesNum > MAX_ELTWISE_INPUTS)
         return false;
 
     if (node->getType() == Eltwise) {
-        auto eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
-        if (eltwiseNode->getParentEdgesAtPort(0)[0]->getParent().get() != this) {
-            if (!isSuitableNode(this)) {
-                return false;
-            }
-
+        if (node->getParentEdgesAtPort(0)[0]->getParent().get() != this) {
             // Eltwise jitter doesn't respect commutative property, so fusing is disabled in case it applied not for 0-th port.
-            if (isOneOf(eltwiseNode->getOpType(), {Subtract, Divide, FloorMod, Mod, PowerDynamic, Greater, GreaterEqual, Less, LessEqual})) {
+            if (one_of(node->getAlgorithm(), EltwiseSubtract, EltwiseDivide, EltwiseFloorMod, EltwiseMod, EltwisePowerDynamic, EltwiseGreater,
+                                             EltwiseGreaterEqual, EltwiseLess, EltwiseLessEqual, EltwiseMulAdd)) {
                 return false;
             }
 
             // Limitation: inputs precision definition inside Eltwise node assumes fusing is applied for 0-th port,
             // otherwise we need identical precision on all inputs of fused node
-            for (int i = 1; i < eltwiseNode->getCnnLayer()->insData.size(); i++) {
-                if (eltwiseNode->getCnnLayer()->insData[0].lock()->getPrecision() != eltwiseNode->getCnnLayer()->insData[i].lock()->getPrecision()) {
+            for (int i = 1; i < getOriginalInputsNumber(); i++) {
+                if (getOriginalInputPrecisionAtPort(0) != getOriginalInputPrecisionAtPort(i)) {
                     return false;
                 }
             }
@@ -1854,11 +1863,8 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
         return true;
     }
 
-    if (node->getType() == Quantize) {
-        auto *quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot get quantize layer " << node->getName();
-        return !quantizeNode->isBinarization();
+    if (node->getType() == FakeQuantize) {
+        return node->getAlgorithm() != FQBinarization;
     }
 
     return false;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
index d5ab2430fc2..8ae34000430 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
@@ -15,55 +15,6 @@ namespace MKLDNNPlugin {
 
 #define MAX_ELTWISE_INPUTS 7
 
-enum EltwiseOpType {
-    Add = 0,
-    Multiply,
-    Subtract,
-    Divide,
-    FloorMod,
-    Mod,
-    Maximum,
-    Minimum,
-    SquaredDifference,
-    PowerDynamic,
-    PowerStatic,
-    MulAdd,
-
-    Equal,
-    NotEqual,
-    Greater,
-    GreaterEqual,
-    Less,
-    LessEqual,
-
-    LogicalAnd,
-    LogicalOr,
-    LogicalXor,
-    LogicalNot,
-
-    Relu,
-    Gelu,
-    Elu,
-    Tanh,
-    Logistic,
-    Square,
-    Abs,
-    Sqrt,
-    Linear,
-    BoundedRelu,
-    SoftRelu,
-    Relu6,
-    Exp,
-    Clamp,
-    Swish,
-    Prelu,
-    Mish,
-    Hswish,
-    Hsigmoid,
-    Round,
-    Erf
-};
-
 struct jit_eltwise_params {
     size_t inputs_number;
     size_t input_size;
@@ -108,7 +59,7 @@ struct jit_uni_eltwise_kernel {
 
 class MKLDNNEltwiseNode : public MKLDNNNode {
 public:
-    MKLDNNEltwiseNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNEltwiseNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNEltwiseNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -119,28 +70,21 @@ public:
     void execute(mkldnn::stream strm) override;
     bool created() const override;
     bool canBeInPlace() const override;
-
-    bool isSum();
-    bool isWithBroadcast();
-
-    bool canFuse(const MKLDNNNodePtr& node) const;
-
-    size_t getOpInputsNum() const;
-    EltwiseOpType getOpType() const { return eltwiseOp; }
-    mkldnn::algorithm getAlgorithm() const { return eltwiseAlgorithm; }
+    bool canFuse(const MKLDNNNodePtr& node) const override;
+    void appendPostOps(mkldnn::post_ops& ops) override;
+    void fuseInto(MKLDNNNodePtr& parentNode) override;
+    InferenceEngine::Precision getRuntimePrecision() const override;
 
     float getAlpha() const { return alpha; }
     float getBeta() const { return beta; }
+    float getGamma() const { return gamma; }
+    mkldnn::algorithm getMKLDNNAlgorithm() const { return mkldnnAlgorithm; }
 
-    void appendPostOps(mkldnn::post_ops& ops) override;
-
-    InferenceEngine::Precision getRuntimePrecision() const override;
+    bool isWithBroadcast();
+    bool isSpecialConvolutionAddFusing() const { return specialConvolutionAddFusing; }
 
 private:
-    void init() override;
-
-    EltwiseOpType eltwiseOp = Add;
-    mkldnn::algorithm eltwiseAlgorithm = mkldnn::algorithm::undef;
+    mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef;
 
     std::shared_ptr<jit_uni_eltwise_kernel> eltwise_kernel = nullptr;
     jit_eltwise_params jep = {};
@@ -148,6 +92,7 @@ private:
     int optimalTensorRank = 6;
     bool canUseOptimizedImpl = false;
     bool isDynBatchEnabled = false;
+    bool specialConvolutionAddFusing = false;
     size_t batchDimIdx = 0;
     size_t tensorRank = 0;
     size_t fullWorkAmount = 0;
@@ -167,6 +112,8 @@ private:
     std::vector<float> scales = {};
     std::vector<float> shifts = {};
 
+    static std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>> initializers;
+
     inline void executeOptimized6D(const std::vector<const uint8_t *>& src_ptrs, uint8_t *dst_ptr);
     inline void executeOptimizedGeneric(const std::vector<const uint8_t *>& src_ptrs, uint8_t *dst_ptr);
     inline void executeReference(const std::vector<const uint8_t *>& src_ptrs, uint8_t *dst_ptr);
@@ -174,8 +121,9 @@ private:
     void offset_out_calc(std::vector<size_t>& offset, std::vector<size_t>& dims);
     void offset_in_calc(std::vector<size_t>& offset, std::vector<size_t>& dims_in, std::vector<size_t>& dims_out);
 
-    static InferenceEngine::details::caseless_map<std::string,
-        std::function<void(InferenceEngine::GenericLayer*, EltwiseOpType&, mkldnn::algorithm&, float&, float&)>> initializers;
+    size_t getOpInputsNum() const;
+
+    void fillScalesAndShifts(const MKLDNNNode *parentNode);
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
new file mode 100644
index 00000000000..c8810e4444b
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
@@ -0,0 +1,132 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include "mkldnn_embedding_bag_offset_sum_node.h"
+#include <ngraph/opsets/opset3.hpp>
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNEmbeddingBagOffsetSumNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto embBagOffsetSumOp = ngraph::as_type_ptr<const ngraph::op::v3::EmbeddingBagOffsetsSum>(op);
+        if (!embBagOffsetSumOp) {
+            errorMessage = "Node is not an instance of the EmbeddingBagOffsetsSum operation from opset v3.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNEmbeddingBagOffsetSumNode::MKLDNNEmbeddingBagOffsetSumNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), MKLDNNEmbeddingBagSumNode(op, 3lu, 1lu, 4lu, 3lu) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    if (op->get_input_shape(INDICES_IDX).size() != 1)
+        IE_THROW() << "'" << _layerName << "' layer has indices data with invalid shape.";
+
+    if (op->get_input_shape(OFFSETS_IDX).size() != 1)
+        IE_THROW() << "'" << _layerName << "' layer's offsets data has invalid shape.";
+
+    _indicesLen = op->get_input_shape(INDICES_IDX)[0];
+    _offsetsLen = op->get_input_shape(OFFSETS_IDX)[0];
+}
+
+void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    std::string logPrefix = std::string("Layer EmbeddingBagSum with name '") + _layerName + "' ";
+    static const std::set<Precision> supportedPrecisions =
+            {Precision::FP32, Precision::I8, Precision::U8, Precision::I32};
+
+    auto inDataPrecision = getOriginalInputPrecisionAtPort(EMB_TABLE_IDX);
+    if (inDataPrecision == Precision::BF16)
+        inDataPrecision = Precision::FP32;
+    if (!supportedPrecisions.empty()) {
+        if (supportedPrecisions.find(inDataPrecision) == supportedPrecisions.end())
+            IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name();
+    } else {
+        static const std::set<Precision> defaultSupportedPrecisions =
+                {Precision::FP32, Precision::I8, Precision::U8, Precision::I32};
+        if (defaultSupportedPrecisions.find(inDataPrecision) == defaultSupportedPrecisions.end())
+            IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name();
+    }
+
+    std::vector<DataConfigurator> inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision},
+                                                       {TensorDescCreatorTypes::ncsp, Precision::I32},
+                                                       {TensorDescCreatorTypes::ncsp, Precision::I32}});
+    if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX)
+        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32});
+    if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX)
+        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision});
+
+    addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
+}
+
+void MKLDNNEmbeddingBagOffsetSumNode::initFromInputs() {
+    indicesData_ = reinterpret_cast<const int *>(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr());
+    offsetsData_ = reinterpret_cast<const int *>(getParentEdgeAt(OFFSETS_IDX)->getMemoryPtr()->GetPtr());
+
+    if (getParentEdges().size() > DEFAULT_INDEX_IDX) {
+        defaultIndices_ = reinterpret_cast<const int *>(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->GetPtr());
+    }
+}
+
+void MKLDNNEmbeddingBagOffsetSumNode::getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) {
+    if (embIndex >= _offsetsLen) {
+        IE_THROW() << "Invalid embedding bag index.";
+    }
+    if (offsetsData_[embIndex] >= _indicesLen) {
+        IE_THROW() << "Offset value exceeds indices size.";
+    }
+
+    indices = nullptr;
+    size = 0lu;
+    withWeight = _withWeights;
+
+    if (embIndex == _offsetsLen - 1lu)
+        size = _indicesLen - offsetsData_[embIndex];
+    else
+        size = offsetsData_[embIndex + 1lu] - offsetsData_[embIndex];
+
+    if (size != 0lu) {
+        indices = indicesData_ + offsetsData_[embIndex];
+    } else {
+        // Empty or default bag
+        withWeight = false;
+        if (defaultIndices_) {
+            indices = defaultIndices_;
+            size = 1lu;
+        }
+        return;
+    }
+
+    if (withWeight)
+        weightsIdx = offsetsData_[embIndex];
+}
+
+void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) {
+    const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const uint8_t* weightsData = nullptr;
+    if (_withWeights)
+        weightsData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr());
+
+    MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc());
+}
+
+bool MKLDNNEmbeddingBagOffsetSumNode::created() const {
+    return getType() == EmbeddingBagOffsetsSum;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNEmbeddingBagOffsetSumNode, EmbeddingBagOffsetsSum)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h
new file mode 100644
index 00000000000..4ae3d331822
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h
@@ -0,0 +1,43 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include "mkldnn_embedding_bag_sum_node.h"
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNEmbeddingBagOffsetSumNode : public MKLDNNNode, public MKLDNNEmbeddingBagSumNode {
+public:
+    MKLDNNEmbeddingBagOffsetSumNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNEmbeddingBagOffsetSumNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    void initFromInputs() override;
+    void getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) override;
+
+    const size_t OFFSETS_IDX = 2lu;
+
+    const int* indicesData_ = nullptr;
+    const int* offsetsData_ = nullptr;
+    const int* defaultIndices_ = nullptr;
+
+    size_t _indicesLen;
+    size_t _offsetsLen;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
new file mode 100644
index 00000000000..4d1b808b502
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
@@ -0,0 +1,99 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include "mkldnn_embedding_bag_packed_sum_node.h"
+#include <ngraph/opsets/opset3.hpp>
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNEmbeddingBagPackedSumNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto embBagPackedSumOp = ngraph::as_type_ptr<const ngraph::op::v3::EmbeddingBagPackedSum>(op);
+        if (!embBagPackedSumOp) {
+            errorMessage = "Node is not an instance of the EmbeddingBagPackedSum operation from opset v3.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNEmbeddingBagPackedSumNode::MKLDNNEmbeddingBagPackedSumNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), MKLDNNEmbeddingBagSumNode(op, 2lu, 1lu, 2lu, 3lu) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    if (op->get_input_shape(INDICES_IDX).size() != 2)
+        IE_THROW() << "'" << _layerName << "' layer has indices data with invalid shape.";
+    _batch = op->get_input_shape(INDICES_IDX)[0];
+    _indicesPerBag = op->get_input_shape(INDICES_IDX)[1];
+}
+
+void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    std::string logPrefix = std::string("Layer EmbeddingBagSum with name '") + _layerName + "' ";
+    static const std::set<Precision> supportedPrecisions =
+            {Precision::FP32, Precision::I8, Precision::U8, Precision::I32};
+
+    auto inDataPrecision = getOriginalInputPrecisionAtPort(EMB_TABLE_IDX);
+    if (inDataPrecision == Precision::BF16)
+        inDataPrecision = Precision::FP32;
+    if (!supportedPrecisions.empty()) {
+        if (supportedPrecisions.find(inDataPrecision) == supportedPrecisions.end())
+            IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name();
+    } else {
+        static const std::set<Precision> defaultSupportedPrecisions =
+                {Precision::FP32, Precision::I8, Precision::U8, Precision::I32};
+        if (defaultSupportedPrecisions.find(inDataPrecision) == defaultSupportedPrecisions.end())
+            IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name();
+    }
+
+    std::vector<DataConfigurator> inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision},
+                                                       {TensorDescCreatorTypes::ncsp, Precision::I32}});
+    if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX)
+        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision});
+
+    addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
+}
+
+void MKLDNNEmbeddingBagPackedSumNode::initFromInputs() {
+    _indices = reinterpret_cast<const int *>(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr());
+}
+
+void MKLDNNEmbeddingBagPackedSumNode::getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) {
+    if (embIndex >= _batch * _indicesPerBag)
+        IE_THROW() << "Invalid embedding bag index.";
+
+    withWeight = true;
+
+    indices = _indices + embIndex * _indicesPerBag;
+    size = _indicesPerBag;
+
+    weightsIdx = embIndex * _indicesPerBag;
+}
+
+void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) {
+    const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const uint8_t* weightsData = nullptr;
+    if (_withWeights)
+        weightsData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr());
+
+    MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc());
+}
+
+bool MKLDNNEmbeddingBagPackedSumNode::created() const {
+    return getType() == EmbeddingBagPackedSum;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNEmbeddingBagPackedSumNode, EmbeddingBagPackedSum)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h
new file mode 100644
index 00000000000..c83d4fdef0c
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h
@@ -0,0 +1,38 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include "mkldnn_embedding_bag_sum_node.h"
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNEmbeddingBagPackedSumNode : public MKLDNNNode, public MKLDNNEmbeddingBagSumNode {
+public:
+    MKLDNNEmbeddingBagPackedSumNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNEmbeddingBagPackedSumNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    void initFromInputs() override;
+    void getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) override;
+
+    const int* _indices;
+    size_t _batch = 0;
+    size_t _indicesPerBag = 0;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp
new file mode 100644
index 00000000000..8abeee76d76
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp
@@ -0,0 +1,141 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "mkldnn_embedding_bag_sum_node.h"
+#include <ngraph/opsets/opset1.hpp>
+#include "common/cpu_memcpy.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+MKLDNNEmbeddingBagSumNode::MKLDNNEmbeddingBagSumNode(
+            const std::shared_ptr<ngraph::Node>& op,
+            size_t requiredInputNum,
+            size_t indicesIdx,
+            size_t perSampleWeightsIdx,
+            size_t defaultIndexIdx) :
+                INDICES_IDX(indicesIdx),
+                PER_SAMPLE_WEIGHTS_IDX(perSampleWeightsIdx),
+                DEFAULT_INDEX_IDX(defaultIndexIdx) {
+    _layerName = op->get_friendly_name();
+    std::string logPrefix = std::string("Layer EmbeddingBagSum with name '") + _layerName + "' ";
+    if (op->get_input_size() < requiredInputNum || op->get_output_size() != 1)
+        IE_THROW() << logPrefix << "has incorrect number of input or output edges!";
+
+    if (op->get_input_size() > PER_SAMPLE_WEIGHTS_IDX)
+        _withWeights = true;
+    if (_withWeights) {
+        if (op->get_input_shape(PER_SAMPLE_WEIGHTS_IDX) != op->get_input_shape(INDICES_IDX))
+             IE_THROW() << logPrefix << "must have equal shapes for indices and per_sample_weights inputs.";
+    }
+
+    const auto& inDataDims = op->get_input_shape(EMB_TABLE_IDX);
+    _embDepth = 1lu;
+    for (size_t i = 1lu; i < inDataDims.size(); i++) {
+        _embDepth *= inDataDims[i];
+    }
+}
+
+template<typename T>
+void MKLDNNEmbeddingBagSumNode::processData(const T* srcData, const T* weightsData, T* dstData,
+                                            const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc) {
+    std::string msgPrefix = std::string("Node EmbeddingBagSum with name '") + _layerName + "' ";
+
+    initFromInputs();
+
+    const auto& inDataDims = srcDesc.getDims();
+    const size_t outputBagsNum = dstDesc.getDims()[0];
+
+    auto threadBody = [&](const int ithr, const int nthr) {
+        size_t start(0lu), end(0lu);
+        splitter(outputBagsNum, nthr, ithr, start, end);
+        if (start >= end)
+            return;
+
+        size_t indicesSize = 0lu;
+        const int* indices = nullptr;
+        int weightsIdx = 0lu;
+        bool withWeights = _withWeights;
+
+        for (size_t obi = start; obi < end; obi++) {
+            size_t dstIndex = obi * _embDepth;
+            getIndices(obi, indices, indicesSize, weightsIdx, withWeights);
+
+            if (indices != nullptr) {
+                withWeights = withWeights & _withWeights;
+
+                size_t inIdx = 0lu;
+                if (indices[inIdx] >= inDataDims[0]) {
+                    IE_THROW() << msgPrefix + "' has invalid embedding bag index: " + std::to_string(indices[inIdx]);
+                }
+                size_t srcIndex = indices[inIdx] * _embDepth;
+
+                if (withWeights) {
+                    for (size_t i = 0lu; i < _embDepth; i++) {
+                        dstData[dstIndex + i] = srcData[srcIndex + i] * weightsData[weightsIdx];
+                    }
+                    weightsIdx++;
+                } else {
+                    for (size_t i = 0lu; i < _embDepth; i++) {
+                        dstData[dstIndex + i] = srcData[srcIndex + i];
+                    }
+                }
+
+                for (inIdx = 1lu; inIdx < indicesSize; inIdx++) {
+                    if (indices[inIdx] >= inDataDims[0]) {
+                        IE_THROW() << msgPrefix + "' has invalid embedding bag index: " + std::to_string(indices[inIdx]);
+                    }
+                    size_t srcIndex = indices[inIdx] * _embDepth;
+
+                    if (withWeights) {
+                        for (size_t i = 0lu; i < _embDepth; i++) {
+                            dstData[dstIndex + i] += srcData[srcIndex + i] * weightsData[weightsIdx];
+                        }
+                        weightsIdx++;
+                    } else {
+                        for (size_t i = 0lu; i < _embDepth; i++) {
+                            dstData[dstIndex + i] += srcData[srcIndex + i];
+                        }
+                    }
+                }
+            } else {
+                for (size_t i = 0lu; i < _embDepth; i++) {
+                    dstData[dstIndex + i] = 0;
+                }
+            }
+        }
+    };
+
+    parallel_nt(0, threadBody);
+}
+
+void MKLDNNEmbeddingBagSumNode::execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData,
+                                        const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc) {
+    switch (srcDesc.getPrecision()) {
+        case Precision::FP32: {
+            return processData<PrecisionTrait<Precision::FP32>::value_type>(reinterpret_cast<const float*>(srcData),
+                    reinterpret_cast<const float*>(weightsData), reinterpret_cast<float*>(dstData), srcDesc, dstDesc);
+        }
+        case Precision::I8: {
+            return processData<PrecisionTrait<Precision::I8>::value_type>(reinterpret_cast<const int8_t*>(srcData),
+                    reinterpret_cast<const int8_t*>(weightsData), reinterpret_cast<int8_t*>(dstData), srcDesc, dstDesc);
+        }
+        case Precision::U8: {
+            return processData<PrecisionTrait<Precision::U8>::value_type>(srcData, weightsData, dstData, srcDesc, dstDesc);
+        }
+        case Precision::I32: {
+            return processData<PrecisionTrait<Precision::I32>::value_type>(reinterpret_cast<const int32_t*>(srcData),
+                    reinterpret_cast<const int32_t*>(weightsData), reinterpret_cast<int32_t*>(dstData), srcDesc, dstDesc);
+        }
+        default: {
+            IE_THROW() << "EmbeddingBagSum layer does not support precision '"
+                        + std::string(srcDesc.getPrecision().name()) + "'";
+        }
+    }
+}
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h
new file mode 100644
index 00000000000..f3513501b5c
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h
@@ -0,0 +1,52 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNEmbeddingBagSumNode {
+public:
+    MKLDNNEmbeddingBagSumNode(
+            const std::shared_ptr<ngraph::Node>&,
+            size_t requiredInputsNum,
+            size_t indicesIdx,
+            size_t perSampleWeightsIdx,
+            size_t defaultIndexIdx);
+
+    void execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData,
+                 const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc);
+
+    ~MKLDNNEmbeddingBagSumNode() = default;
+
+protected:
+    virtual void initFromInputs() = 0;
+    virtual void getIndices(
+            int embIndex,
+            const int*& indicesRef,
+            size_t& size,
+            int& weightsIdx,
+            bool& withWeights) = 0;
+
+    template<typename T>
+    void processData(const T* srcData, const T* weightsData, T* dstData,
+                     const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc);
+
+    const size_t EMB_TABLE_IDX = 0lu;
+    const size_t INDICES_IDX;
+    const size_t PER_SAMPLE_WEIGHTS_IDX;
+    const size_t DEFAULT_INDEX_IDX;
+
+    bool _withWeights = false;
+    size_t _embDepth = 0;
+    std::string _layerName;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
new file mode 100644
index 00000000000..798feecf7bd
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
@@ -0,0 +1,134 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include "mkldnn_embedding_segments_sum_node.h"
+#include <ngraph/opsets/opset3.hpp>
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNEmbeddingSegmentsSumNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto embBagSegSumOp = ngraph::as_type_ptr<const ngraph::op::v3::EmbeddingSegmentsSum>(op);
+        if (!embBagSegSumOp) {
+            errorMessage = "Node is not an instance of the EmbeddingSegmentsSum operation from opset v3.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNEmbeddingSegmentsSumNode::MKLDNNEmbeddingSegmentsSumNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), MKLDNNEmbeddingBagSumNode(op, 4lu, 1lu, 5lu, 4lu) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    std::string errPrefix = std::string("EmbeddingSegmentsSum layer with name '") + _layerName + "' ";
+    if (op->get_input_shape(INDICES_IDX).size() != 1)
+        IE_THROW() << errPrefix << "has indices data with invalid shape: "
+                   << op->get_input_shape(INDICES_IDX).size();
+
+    if (op->get_input_shape(SEGMENT_ID_IDX).size() != 1)
+        IE_THROW() << errPrefix << "has invalid segmentID data shape: "
+                   << op->get_input_shape(SEGMENT_ID_IDX).size();
+}
+
+void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    std::string logPrefix = std::string("Layer EmbeddingBagSum with name '") + _layerName + "' ";
+    static const std::set<Precision> supportedPrecisions =
+            {Precision::FP32, Precision::I8, Precision::U8, Precision::I32};
+
+    auto inDataPrecision = getOriginalInputPrecisionAtPort(EMB_TABLE_IDX);
+    if (inDataPrecision == Precision::BF16)
+        inDataPrecision = Precision::FP32;
+    if (!supportedPrecisions.empty()) {
+        if (supportedPrecisions.find(inDataPrecision) == supportedPrecisions.end())
+            IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name();
+    } else {
+        static const std::set<Precision> defaultSupportedPrecisions =
+                {Precision::FP32, Precision::I8, Precision::U8, Precision::I32};
+        if (defaultSupportedPrecisions.find(inDataPrecision) == defaultSupportedPrecisions.end())
+            IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name();
+    }
+
+    std::vector<DataConfigurator> inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision},
+                                                       {TensorDescCreatorTypes::ncsp, Precision::I32},
+                                                       {TensorDescCreatorTypes::ncsp, Precision::I32},
+                                                       {TensorDescCreatorTypes::ncsp, Precision::I32}});
+    if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX)
+        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32});
+    if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX)
+        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision});
+
+    addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
+}
+
+void MKLDNNEmbeddingSegmentsSumNode::initFromInputs() {
+    indices_ = reinterpret_cast<const int *>(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr());
+    indicesSize_ = getParentEdgeAt(INDICES_IDX)->getBlob()->size();
+
+    segmentIds_ = reinterpret_cast<const int *>(getParentEdgeAt(SEGMENT_ID_IDX)->getMemoryPtr()->GetPtr());
+
+    if (getParentEdges().size() > NUM_SEGMENTS_IDX) {
+        numSegments_ = reinterpret_cast<const int *>(getParentEdgeAt(NUM_SEGMENTS_IDX)->getMemoryPtr()->GetPtr())[0];
+    }
+
+    if (getParentEdges().size() > DEFAULT_INDEX_IDX) {
+        defaultIndices_ = reinterpret_cast<const int *>(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->GetPtr());
+    }
+}
+
+void MKLDNNEmbeddingSegmentsSumNode::getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) {
+    if (embIndex >= numSegments_)
+        IE_THROW() << "Invalid embedding bag index.";
+
+    indices = nullptr;
+    size = 0;
+    withWeight = true;
+
+    for (int si = 0; si < indicesSize_; si++) {
+        if (segmentIds_[si] == embIndex) {
+            size++;
+            if (indices == nullptr) {
+                indices = indices_ + si;
+                weightsIdx = si;
+            }
+        }
+    }
+
+    // Empty bag
+    if (size == 0) {
+        size = 1lu;
+        withWeight = false;
+        if (defaultIndices_)
+            indices = defaultIndices_;
+        return;
+    }
+}
+
+void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) {
+    const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const uint8_t* weightsData = nullptr;
+    if (_withWeights)
+        weightsData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr());
+
+    MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc());
+}
+
+bool MKLDNNEmbeddingSegmentsSumNode::created() const {
+    return getType() == EmbeddingSegmentsSum;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNEmbeddingSegmentsSumNode, EmbeddingSegmentsSum)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h
new file mode 100644
index 00000000000..54e269a391e
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include "mkldnn_embedding_bag_sum_node.h"
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNEmbeddingSegmentsSumNode : public MKLDNNNode, public MKLDNNEmbeddingBagSumNode {
+public:
+    MKLDNNEmbeddingSegmentsSumNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNEmbeddingSegmentsSumNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    void initFromInputs() override;
+    void getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) override;
+
+    const size_t SEGMENT_ID_IDX = 2lu;
+    const size_t NUM_SEGMENTS_IDX = 3lu;
+
+    int numSegments_ = 0;
+
+    const int* indices_;
+    const int* segmentIds_;
+    const int* defaultIndices_ = nullptr;
+
+    size_t indicesSize_ = 0;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
similarity index 81%
rename from inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
rename to inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
index a48187adecc..0a5ad38507b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
@@ -2,9 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 
-#include <legacy/ie_layers.h>
 #include <string>
 #include <vector>
 #include <math.h>
@@ -214,7 +213,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
     };
 
     void generate() override {
-        do_dequantization = jqp_.op_type == QuantizeOpType::FakeQuantization;
+        do_dequantization = jqp_.op_type == FQCommon;
         do_rounding = do_dequantization || jqp_.dst_prc == Precision::FP32;
 
         this->preamble();
@@ -817,233 +816,235 @@ private:
     }
 };
 
-MKLDNNQuantizeNode::MKLDNNQuantizeNode(CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
-
-void MKLDNNQuantizeNode::init() {
-    auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(getCnnLayer().get());
-    if (quantizeLayer == nullptr)
-        IE_THROW() << "Cannot convert Quantize layer " << getName();
-
-    levels = quantizeLayer->levels;
-    if (levels <= 1)
-        IE_THROW() << "Quantize layer " << getName() << " supports only parameter levels > 1";
-
-    if (getParentEdges().size() != 5)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
-    if (getChildEdges().empty())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
-
-    for (size_t i = 0; i < getParentEdges().size(); i++) {
-        if (getParentEdgesAtPort(i).size() != 1)
-            IE_THROW() << "Quantize layer " << getName() << " has unsupported number of parent edges at port " << i;
-    }
-
-    auto initAxisIdx = [&](size_t edgeIdx) {
-        auto edge = getParentEdgesAtPort(edgeIdx)[0];
-
-        size_t axisIdx = 0;
-        int numberOfNonUnit = 0;
-        if (edge->getDims().ndims() > 0) {
-            if (edge->getDims()[0] > 1) {
-                numberOfNonUnit++;
+bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto fq = std::dynamic_pointer_cast<const ngraph::opset1::FakeQuantize>(op);
+        if (!fq) {
+            errorMessage = "Only opset1 FakeQuantize operation is supported";
+            return false;
+        }
+        if (fq->get_input_shape(0).size() < 2 || fq->get_input_shape(0).size() > 5) {
+            errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(fq->get_input_shape(0).size());
+            return false;
+        }
+        for (size_t i = 1; i < fq->get_input_size(); i++) {
+            if (fq->get_input_shape(i).size() > 5) {
+                errorMessage = "Doesn't support 'range' input with rank: " + std::to_string(fq->get_input_shape(i).size());
+                return false;
             }
         }
-
-        for (int i = 1; i < edge->getDims().ndims(); i++) {
-            if (edge->getDims()[i] > 1) {
-                axisIdx = i;
-                numberOfNonUnit++;
+        for (size_t i = 1; i < fq->get_input_size(); i++) {
+            if (!std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(i))) {
+                errorMessage = "Has non const 'range' input on " + std::to_string(i) + " port";
+                return false;
             }
         }
-        if (numberOfNonUnit > 1) {
-            IE_THROW() << "Quantize layer " << getName() << " supports only per-tensor and per-channel quantizations";
-        }
+        for (size_t i = 1; i < fq->get_input_size(); i++) {
+            size_t count_not_unit_axis = 0;
+            auto shape = fq->get_input_shape(i);
 
-        return axisIdx;
-    };
-
-    axis = getParentEdgesAtPort(0)[0]->getDims().ndims() == 1 ? 0 : 1;
-
-    std::set<size_t> quantizationParamsAxisesIdxs;
-    std::set<size_t> quantizationParamsAxisesSizes;
-
-    auto inputLowAxis = initAxisIdx(1);
-    isInputLowBroadcasted = getParentEdgesAtPort(1)[0]->getDims()[inputLowAxis] == 1;
-    if (!isInputLowBroadcasted) {
-        quantizationParamsAxisesIdxs.insert(inputLowAxis);
-        quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(1)[0]->getDims()[inputLowAxis]);
-    }
-
-    auto inputHighAxis = initAxisIdx(2);
-    isInputHighBroadcasted = getParentEdgesAtPort(2)[0]->getDims()[inputHighAxis] == 1;
-    if (!isInputHighBroadcasted) {
-        quantizationParamsAxisesIdxs.insert(inputHighAxis);
-        quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(2)[0]->getDims()[inputHighAxis]);
-    }
-
-    auto outputLowAxis = initAxisIdx(3);
-    isOutputLowBroadcasted = getParentEdgesAtPort(3)[0]->getDims()[outputLowAxis] == 1;
-    if (!isOutputLowBroadcasted) {
-        quantizationParamsAxisesIdxs.insert(outputLowAxis);
-        quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(3)[0]->getDims()[outputLowAxis]);
-    }
-
-    auto outputHighAxis = initAxisIdx(4);
-    isOutputHighBroadcasted = getParentEdgesAtPort(4)[0]->getDims()[outputHighAxis] == 1;
-    if (!isOutputHighBroadcasted) {
-        quantizationParamsAxisesIdxs.insert(outputHighAxis);
-        quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(4)[0]->getDims()[outputHighAxis]);
-    }
-
-    if (quantizationParamsAxisesIdxs.size() > 1 || quantizationParamsAxisesSizes.size() > 1)
-        IE_THROW() << "Unsupported input sizes for Quantize layer with name " << getName();
-
-    if (quantizationParamsAxisesIdxs.size() == 1) {
-        axis = *quantizationParamsAxisesIdxs.begin();
-    }
-
-    auto inputLowAxisSize = getParentEdgesAtPort(1)[0]->getDims()[inputLowAxis];
-    auto inputHighAxisSize = getParentEdgesAtPort(2)[0]->getDims()[inputHighAxis];
-    auto outputLowAxisSize = getParentEdgesAtPort(3)[0]->getDims()[outputLowAxis];
-    auto outputHighAxisSize = getParentEdgesAtPort(4)[0]->getDims()[outputHighAxis];
-
-    size_t axisRealSize = static_cast<size_t>(getParentEdgesAtPort(0)[0]->getDims()[axis]);
-    size_t axisPaddedSize = static_cast<size_t>(rnd_up(getParentEdgesAtPort(0)[0]->getDims()[axis], 16));
-
-    if (quantizationParamsAxisesSizes.size() == 1) {
-        if (*quantizationParamsAxisesSizes.begin() != axisRealSize)
-            IE_THROW() << "Unsupported input sizes for Quantize layer with name " << getName();
-    }
-
-    for (size_t i = 1; i < getParentEdges().size(); i++) {
-        if (!getParentEdgesAtPort(i)[0]->getParent()->isConstant())
-            IE_THROW() << "Quantize layer with name " << getName() << " has non const input on " << i << " port";
-        auto prec = getCnnLayer()->insData[i].lock()->getPrecision();
-        if (prec != Precision::FP32)
-            IE_THROW() << "Quantize layer with name " << getName() << " has unsupported precision " << prec << " on " << i << " port";
-    }
-
-    auto inputLowBlob = dynamic_cast<TBlob<float>*>(getParentEdgesAtPort(1)[0]->getParent()->getCnnLayer()->blobs["custom"].get());
-    auto inputLowData = inputLowBlob->buffer().as<float*>();
-
-    auto inputHighBlob = dynamic_cast<TBlob<float>*>(getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer()->blobs["custom"].get());
-    auto inputHighData = inputHighBlob->buffer().as<float*>();
-
-    auto outputLowBlob = dynamic_cast<TBlob<float>*>(getParentEdgesAtPort(3)[0]->getParent()->getCnnLayer()->blobs["custom"].get());
-    auto outputLowData = outputLowBlob->buffer().as<float*>();
-
-    auto outputHighBlob = dynamic_cast<TBlob<float>*>(getParentEdgesAtPort(4)[0]->getParent()->getCnnLayer()->blobs["custom"].get());
-    auto outputHighData = outputHighBlob->buffer().as<float*>();
-
-    bool binarization = levels == 2;
-
-    if (binarization) {
-        for (int i = 0; i < outputLowAxisSize; i++) {
-            if (outputLowData[i] != 1.f && outputLowData[i] != 0.f) {
-                binarization = false;
-                break;
-            }
-        }
-
-        for (int i = 0; i < outputHighAxisSize; i++) {
-            if (outputHighData[i] != 1.f && outputHighData[i] != 0.f) {
-                binarization = false;
-                break;
-            }
-        }
-
-        for (ptrdiff_t i = 0; i < std::max(inputLowAxisSize, inputHighAxisSize); i++) {
-            if (inputLowData[isInputLowBroadcasted ? 0 : i] != inputHighData[isInputHighBroadcasted ? 0 : i]) {
-                binarization = false;
-                break;
-            }
-        }
-    }
-
-    if (binarization) {
-        quantizeOpType = QuantizeOpType::Binarization;
-
-        binarizationThresholds.resize(axisPaddedSize);
-        binarizationOutputMask.resize(axisPaddedSize);
-
-        for (int i = 0; i < axisRealSize; i++) {
-            binarizationThresholds[i] = inputLowData[isInputLowBroadcasted ? 0 : i];
-            binarizationOutputMask[i] = outputHighData[isOutputHighBroadcasted ? 0 : i] == 1.f ? 0xffffffff : 0x00000000;
-        }
-    } else {
-        auto allElementsAreEqual = [&](const float* data, size_t size) {
-            if (size == 0)
-                return true;
-
-            auto first = data[0];
-            for (int i = 1; i < size; i++) {
-                if (data[i] != first)
+            if (ngraph::shape_size(shape) != 1) {
+                size_t not_unit_axis = 0;
+                for (size_t i = 0; i < shape.size(); i++) {
+                    if (shape[i] > 1) {
+                        not_unit_axis = i;
+                        count_not_unit_axis++;
+                    }
+                }
+                if (count_not_unit_axis > 1 || not_unit_axis > 1) {
+                    errorMessage = "Supports only per-tensor and per-channel quantizations";
                     return false;
+                }
+            }
+        }
+        if (fq->get_auto_broadcast().m_type != ngraph::op::AutoBroadcastType::NONE &&
+            fq->get_auto_broadcast().m_type != ngraph::op::AutoBroadcastType::NUMPY) {
+            errorMessage = "Doesn't support broadcast type: " + ngraph::as_string(fq->get_auto_broadcast().m_type);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        algorithm = FQCommon;
+        const auto fq = std::dynamic_pointer_cast<const ngraph::opset1::FakeQuantize>(op);
+
+        errorPrefix = "FakeQuantize node with name '" + getName() + "' ";
+        levels = fq->get_levels();
+        if (levels <= 1)
+            IE_THROW() << errorPrefix << "supports 'levels' attribute greater than or equal to 2";
+
+        if (fq->get_input_size() != 5)
+            IE_THROW() << errorPrefix << "has incorrect number of input edges: " << fq->get_input_size();
+        if (fq->get_output_size() != 1)
+            IE_THROW() << errorPrefix << "has incorrect number of output edges: " << fq->get_output_size();
+
+        auto initAxisIdx = [&](size_t edgeIdx) {
+            const auto &inputDims = fq->get_input_shape(edgeIdx);
+
+            size_t axisIdx = 0;
+            for (int i = 1; i < inputDims.size(); i++) {
+                if (inputDims[i] > 1) {
+                    axisIdx = i;
+                }
             }
 
-            return true;
+            return axisIdx;
         };
 
-        if (allElementsAreEqual(inputLowData, inputLowAxisSize)) {
-            inputLowAxisSize = 1;
-            isInputLowBroadcasted = true;
+        axis = fq->get_input_shape(0).size() == 1 ? 0 : 1;
+        int axisSize = -1;
+
+        auto inputLowAxis = initAxisIdx(1);
+        const auto ilShape = fq->get_input_shape(1);
+        isInputLowBroadcasted = (ngraph::is_scalar(ilShape) || ilShape[inputLowAxis] == 1);
+        if (!isInputLowBroadcasted) {
+            axis = inputLowAxis;
+            axisSize = ilShape[inputLowAxis];
         }
 
-        if (allElementsAreEqual(inputHighData, inputHighAxisSize)) {
-            inputHighAxisSize = 1;
-            isInputHighBroadcasted = true;
+        auto inputHighAxis = initAxisIdx(2);
+        const auto ihShape = fq->get_input_shape(2);
+        isInputHighBroadcasted = (ngraph::is_scalar(ihShape) || ihShape[inputHighAxis] == 1);
+        if (!isInputHighBroadcasted) {
+            axis = inputHighAxis;
+            axisSize = ihShape[inputHighAxis];
         }
 
-        if (allElementsAreEqual(outputLowData, outputLowAxisSize)) {
-            outputLowAxisSize = 1;
-            isOutputLowBroadcasted = true;
+        auto outputLowAxis = initAxisIdx(3);
+        const auto olShape = fq->get_input_shape(3);
+        isOutputLowBroadcasted = (ngraph::is_scalar(olShape) || olShape[outputLowAxis] == 1);
+        if (!isOutputLowBroadcasted) {
+            axis = outputLowAxis;
+            axisSize = olShape[outputLowAxis];
         }
 
-        if (allElementsAreEqual(outputHighData, outputHighAxisSize)) {
-            outputHighAxisSize = 1;
-            isOutputHighBroadcasted = true;
+        auto outputHighAxis = initAxisIdx(4);
+        const auto ohShape = fq->get_input_shape(4);
+        isOutputHighBroadcasted = (ngraph::is_scalar(ohShape) || ohShape[outputHighAxis] == 1);
+        if (!isOutputHighBroadcasted) {
+            axis = outputHighAxis;
+            axisSize = ohShape[outputHighAxis];
         }
 
-        cropLow.resize(inputLowAxisSize);
-        cropHigh.resize(inputHighAxisSize);
-        inputScale.resize(std::max(inputLowAxisSize, inputHighAxisSize));
-        inputShift.resize(std::max(inputLowAxisSize, inputHighAxisSize));
-        outputScale.resize(std::max(outputLowAxisSize, outputHighAxisSize));
-        outputShift.resize(outputLowAxisSize);
+        auto inputLowAxisSize = ngraph::is_scalar(ilShape) ? 1 : ilShape[inputLowAxis];
+        auto inputHighAxisSize = ngraph::is_scalar(ihShape) ? 1 : ihShape[inputHighAxis];
+        auto outputLowAxisSize = ngraph::is_scalar(olShape) ? 1 : olShape[outputLowAxis];
+        auto outputHighAxisSize = ngraph::is_scalar(ohShape) ? 1 : ohShape[outputHighAxis];
 
-        bool quantizationOnly = true;
+        int axisRealSize = static_cast<int>(fq->get_input_shape(0)[axis]);
+        size_t axisPaddedSize = static_cast<size_t>(rnd_up(fq->get_input_shape(0)[axis], 16));
 
-        // WA: Depending on the hardware, the compiler may enable data movement optimizations that require working with aligned data.
-        // If we receive unaligned data, then working with them can lead to segfault. To avoid this situation, we added a function
-        // that pushes data into aligned memory.
-        auto prepareAlignedData = [](float *newData, const float *oldData, int dataSize) {
-            memcpy(newData, oldData, dataSize * sizeof(float));
-        };
+        if (axisSize != -1 && axisSize != axisRealSize)
+            IE_THROW() << errorPrefix << "has different quantization axis size on 'data' and 'range' inputs";
 
-        std::vector<float> inputLowDataAligned(inputLowAxisSize);
-        prepareAlignedData(&inputLowDataAligned[0], inputLowData, inputLowDataAligned.size());
+        const auto inputLowNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(1));
+        auto inputLowData = inputLowNode->cast_vector<float>();
 
-        std::vector<float> inputHighDataAligned(inputHighAxisSize);
-        prepareAlignedData(&inputHighDataAligned[0], inputHighData, inputHighDataAligned.size());
+        const auto inputHighNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(2));
+        auto inputHighData = inputHighNode->cast_vector<float>();
 
-        std::vector<float> outputLowDataAligned(outputLowAxisSize);
-        prepareAlignedData(&outputLowDataAligned[0], outputLowData, outputLowDataAligned.size());
+        const auto outputLowNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(3));
+        auto outputLowData = outputLowNode->cast_vector<float>();
 
-        std::vector<float> outputHighDataAligned(outputHighAxisSize);
-        prepareAlignedData(&outputHighDataAligned[0], outputHighData, outputHighDataAligned.size());
+        const auto outputHighNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(4));
+        auto outputHighData = outputHighNode->cast_vector<float>();
 
-        for (int i = 0; i < cropLow.size(); i++) {
-            cropLow[i] = inputLowDataAligned[isInputLowBroadcasted ? 0 : i];
+        binarization = levels == 2;
+
+        if (binarization) {
+            for (int i = 0; i < outputLowAxisSize; i++) {
+                if (outputLowData[i] != 1.f && outputLowData[i] != 0.f) {
+                    binarization = false;
+                    break;
+                }
+            }
+
+            for (int i = 0; i < outputHighAxisSize; i++) {
+                if (outputHighData[i] != 1.f && outputHighData[i] != 0.f) {
+                    binarization = false;
+                    break;
+                }
+            }
+
+            for (ptrdiff_t i = 0; i < std::max(inputLowAxisSize, inputHighAxisSize); i++) {
+                if (inputLowData[isInputLowBroadcasted ? 0 : i] != inputHighData[isInputHighBroadcasted ? 0 : i]) {
+                    binarization = false;
+                    break;
+                }
+            }
         }
 
-        for (int i = 0; i < cropHigh.size(); i++) {
-            cropHigh[i] = inputHighDataAligned[isInputHighBroadcasted ? 0 : i];
-        }
+        if (binarization) {
+            algorithm = FQBinarization;
 
-        for (int i = 0; i < inputScale.size(); i++) {
-            float il = inputLowDataAligned[isInputLowBroadcasted ? 0 : i];
-            float ih = inputHighDataAligned[isInputHighBroadcasted ? 0 : i];
+            binarizationThresholds.resize(axisPaddedSize);
+            binarizationOutputMask.resize(axisPaddedSize);
+
+            for (int i = 0; i < axisRealSize; i++) {
+                binarizationThresholds[i] = inputLowData[isInputLowBroadcasted ? 0 : i];
+                binarizationOutputMask[i] = outputHighData[isOutputHighBroadcasted ? 0 : i] == 1.f ? 0xffffffff : 0x00000000;
+            }
+        } else {
+            auto allElementsAreEqual = [&](const std::vector<float> &data, size_t size) {
+                if (size == 0)
+                    return true;
+
+                auto first = data[0];
+                for (int i = 1; i < size; i++) {
+                    if (data[i] != first)
+                        return false;
+                }
+
+                return true;
+            };
+
+            if (allElementsAreEqual(inputLowData, inputLowAxisSize)) {
+                inputLowAxisSize = 1;
+                isInputLowBroadcasted = true;
+            }
+
+            if (allElementsAreEqual(inputHighData, inputHighAxisSize)) {
+                inputHighAxisSize = 1;
+                isInputHighBroadcasted = true;
+            }
+
+            if (allElementsAreEqual(outputLowData, outputLowAxisSize)) {
+                outputLowAxisSize = 1;
+                isOutputLowBroadcasted = true;
+            }
+
+            if (allElementsAreEqual(outputHighData, outputHighAxisSize)) {
+                outputHighAxisSize = 1;
+                isOutputHighBroadcasted = true;
+            }
+
+            cropLow.resize(inputLowAxisSize);
+            cropHigh.resize(inputHighAxisSize);
+            inputScale.resize(std::max(inputLowAxisSize, inputHighAxisSize));
+            inputShift.resize(std::max(inputLowAxisSize, inputHighAxisSize));
+            outputScale.resize(std::max(outputLowAxisSize, outputHighAxisSize));
+            outputShift.resize(outputLowAxisSize);
+
+            bool quantizationOnly = true;
+
+            for (int i = 0; i < cropLow.size(); i++) {
+                cropLow[i] = inputLowData[isInputLowBroadcasted ? 0 : i];
+            }
+
+            for (int i = 0; i < cropHigh.size(); i++) {
+                cropHigh[i] = inputHighData[isInputHighBroadcasted ? 0 : i];
+            }
+
+            for (int i = 0; i < inputScale.size(); i++) {
+                float il = inputLowData[isInputLowBroadcasted ? 0 : i];
+                float ih = inputHighData[isInputHighBroadcasted ? 0 : i];
 
 #if defined(VALIDATE_QUANTIZATION_RANGES)
             if ((il == ih && levels != 2) || il > ih || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) {
@@ -1052,56 +1053,44 @@ void MKLDNNQuantizeNode::init() {
             }
 #endif
 
-            inputScale[i] = (levels - 1) / (ih - il);
-            inputShift[i] = -il * (levels - 1) / (ih - il);
-        }
+                inputScale[i] = (levels - 1) / (ih - il);
+                inputShift[i] = -il * (levels - 1) / (ih - il);
+            }
 
-        for (int i = 0; i < outputScale.size(); i++) {
-            float ol = outputLowDataAligned[isOutputLowBroadcasted ? 0 : i];
-            float oh = outputHighDataAligned[isOutputHighBroadcasted ? 0 : i];
+            for (int i = 0; i < outputScale.size(); i++) {
+                float ol = outputLowData[isOutputLowBroadcasted ? 0 : i];
+                float oh = outputHighData[isOutputHighBroadcasted ? 0 : i];
 
 #if defined(VALIDATE_QUANTIZATION_RANGES)
-            if (std::isnan(ol) || std::isnan(oh) || std::isinf(ol) || std::isinf(oh)) {
-                IE_THROW() << "Quantize layer with name '" << getName() << "' has wrong output quantize ranges: "
-                                   << "outputLow = " << ol << ", outputHigh = " << oh;
-            }
+                if (std::isnan(ol) || std::isnan(oh) || std::isinf(ol) || std::isinf(oh)) {
+                    IE_THROW() << "Quantize layer with name '" << getName() << "' has wrong output quantize ranges: "
+                                       << "outputLow = " << ol << ", outputHigh = " << oh;
+                }
 #endif
 
-            outputScale[i] = (oh - ol) / (levels - 1);
+                outputScale[i] = (oh - ol) / (levels - 1);
 
-            if (outputScale[i] != 1.f)
-                quantizationOnly = false;
+                if (outputScale[i] != 1.f)
+                    quantizationOnly = false;
+            }
+
+            for (int i = 0; i < outputShift.size(); i++) {
+                float ol = outputLowData[isOutputLowBroadcasted ? 0 : i];
+
+                outputShift[i] = ol;
+
+                if (outputShift[i] != 0.f)
+                    quantizationOnly = false;
+            }
+
+            algorithm = quantizationOnly ? FQQuantization : FQCommon;
         }
-
-        for (int i = 0; i < outputShift.size(); i++) {
-            float ol = outputLowDataAligned[isOutputLowBroadcasted ? 0 : i];
-
-            outputShift[i] = ol;
-
-            if (outputShift[i] != 0.f)
-                quantizationOnly = false;
-        }
-
-        quantizeOpType = quantizationOnly ? QuantizeOpType::Quantization : QuantizeOpType::FakeQuantization;
-    }
-
-    if (binarization) {
-        inputPrecision = Precision::FP32;
-        outputPrecision = Precision::BIN;
     } else {
-        inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
-        outputPrecision = getCnnLayer()->outData[0]->getPrecision();
-
-        if (inputPrecision != Precision::FP32 && inputPrecision != Precision::U8 && inputPrecision != Precision::I8)
-            inputPrecision = Precision::FP32;
-
-        if (outputPrecision != Precision::FP32 && outputPrecision != Precision::U8 && outputPrecision != Precision::I8)
-            outputPrecision = Precision::FP32;
+        IE_THROW(NotImplemented) << errorMessage;
     }
 }
 
-
-std::vector<mkldnn::memory::format_tag> MKLDNNQuantizeNode::getDataFormats() const {
+std::vector<mkldnn::memory::format_tag> MKLDNNFakeQuantizeNode::getDataFormats() const {
     // Special case for first FQ in the network
     if (getParentEdgesAtPort(0)[0]->getDims()[getAxis()] == 3) {
         return { MKLDNNMemory::GetPlainFormat(getParentEdgesAtPort(0)[0]->getDims()) };
@@ -1131,17 +1120,37 @@ std::vector<mkldnn::memory::format_tag> MKLDNNQuantizeNode::getDataFormats() con
     }
 }
 
-void MKLDNNQuantizeNode::getSupportedDescriptors() {
-    std::string errorPrefix = "Quantize layer with name '" + getName() + "' ";
+void MKLDNNFakeQuantizeNode::init() {
+    if (binarization) {
+        inputPrecision = Precision::FP32;
+        outputPrecision = Precision::BIN;
+    } else {
+        inputPrecision = getOriginalInputPrecisionAtPort(0);
+        outputPrecision = getOriginalOutputPrecisionAtPort(0);
+
+        if (inputPrecision != Precision::FP32 && inputPrecision != Precision::U8 && inputPrecision != Precision::I8)
+            inputPrecision = Precision::FP32;
+
+        if (outputPrecision != Precision::FP32 && outputPrecision != Precision::U8 && outputPrecision != Precision::I8)
+            outputPrecision = Precision::FP32;
+    }
+}
+
+void MKLDNNFakeQuantizeNode::getSupportedDescriptors() {
+    if (getParentEdges().size() != 5)
+        IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size();
+    if (getChildEdges().empty())
+        IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size();
+
+    for (size_t i = 0; i < getParentEdges().size(); i++) {
+        if (getParentEdgesAtPort(i).size() != 1)
+            IE_THROW() << errorPrefix << "has unsupported number of parent edges at port " << i;
+    }
 
     if (getParentEdgesAtPort(0)[0]->getDims().ndims() != getChildEdgesAtPort(0)[0]->getDims().ndims()) {
         IE_THROW() << errorPrefix << "has different ranks for input and output tensors";
     }
 
-    if (getParentEdgesAtPort(0)[0]->getDims().ndims() < 1ul || getParentEdgesAtPort(0)[0]->getDims().ndims() > 5ul) {
-        IE_THROW() << errorPrefix << "has unsupported number of dimensions for input at edge 0";
-    }
-
     if (isBinarization()) {
         if (getParentEdgesAtPort(0)[0]->getDims().ndims() != 4ul) {
             IE_THROW() << errorPrefix << "doesn't support input/output rank != 4";
@@ -1156,7 +1165,7 @@ void MKLDNNQuantizeNode::getSupportedDescriptors() {
     }
 }
 
-void MKLDNNQuantizeNode::initSupportedPrimitiveDescriptors() {
+void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
@@ -1210,7 +1219,7 @@ void MKLDNNQuantizeNode::initSupportedPrimitiveDescriptors() {
     }
 }
 
-void MKLDNNQuantizeNode::createPrimitive() {
+void MKLDNNFakeQuantizeNode::createPrimitive() {
     auto config = getSelectedPrimitiveDescriptor()->getConfig();
 
     auto inDims = config.inConfs[0].desc.getDims();
@@ -1222,7 +1231,7 @@ void MKLDNNQuantizeNode::createPrimitive() {
 
     jqp.src_layout = config.inConfs[0].desc.getLayout();
 
-    jqp.op_type = quantizeOpType;
+    jqp.op_type = getAlgorithm();
 
     auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
     if (!selectedPrimitiveDescriptor)
@@ -1282,7 +1291,7 @@ void MKLDNNQuantizeNode::createPrimitive() {
     }
 }
 
-void MKLDNNQuantizeNode::executeReference() {
+void MKLDNNFakeQuantizeNode::executeReference() {
     auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
     auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
 
@@ -1301,7 +1310,7 @@ void MKLDNNQuantizeNode::executeReference() {
     const int H = srcDims.size() == 3 ? srcDims[2] : srcDims.size() > 3 ? srcDims[srcDims.size() - 2] : 1;
     const int W = srcDims.size() > 3 ? srcDims[srcDims.size() - 1] : 1;
 
-    if (jqp.op_type == QuantizeOpType::Binarization) {
+    if (jqp.op_type == FQBinarization) {
         size_t tmp = s_str[s_str.size() - 1];
         for (int i = s_str.size() - 1; i > 1; i--) {
             s_str[i] = s_str[i - 1];
@@ -1400,7 +1409,7 @@ void MKLDNNQuantizeNode::executeReference() {
     }
 }
 
-void MKLDNNQuantizeNode::executeBinarization() {
+void MKLDNNFakeQuantizeNode::executeBinarization() {
     auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
     auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
 
@@ -1440,7 +1449,7 @@ void MKLDNNQuantizeNode::executeBinarization() {
     });
 }
 
-void MKLDNNQuantizeNode::executeQuantization() {
+void MKLDNNFakeQuantizeNode::executeQuantization() {
     auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
     auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
 
@@ -1541,13 +1550,13 @@ void MKLDNNQuantizeNode::executeQuantization() {
     }
 }
 
-void MKLDNNQuantizeNode::execute(mkldnn::stream strm) {
+void MKLDNNFakeQuantizeNode::execute(mkldnn::stream strm) {
     auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
     if (!selectedPrimitiveDescriptor)
         IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
 
     if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
-        if (jqp.op_type == QuantizeOpType::Binarization)
+        if (jqp.op_type == FQBinarization)
             executeBinarization();
         else
             executeQuantization();
@@ -1556,13 +1565,13 @@ void MKLDNNQuantizeNode::execute(mkldnn::stream strm) {
     }
 }
 
-void MKLDNNQuantizeNode::appendPostOps(mkldnn::post_ops& ops) {
+void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops) {
     // MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
     // by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
     // Otherwise it can lead to buffer over-read and performance penalties due to denormals.
     const size_t bufferAlignment = 16;
 
-    if (quantizeOpType == QuantizeOpType::Binarization) {
+    if (getAlgorithm() == FQBinarization) {
         if (!isPostOpDataInitialized) {
             size_t paddedSize = rnd_up(binarizationThresholds.size(), bufferAlignment);
             binarizationThresholds.resize(paddedSize, 0);
@@ -1593,8 +1602,8 @@ void MKLDNNQuantizeNode::appendPostOps(mkldnn::post_ops& ops) {
             outputShiftData.set(outputShift.size(), 1 << 1, &outputShift[0]);
         }
 
-        mkldnn::algorithm alg = quantizeOpType == QuantizeOpType::FakeQuantization ? mkldnn::algorithm::quantization_quantize_dequantize :
-                                                                                     mkldnn::algorithm::quantization_quantize;
+        mkldnn::algorithm alg = getAlgorithm() == FQCommon ? mkldnn::algorithm::quantization_quantize_dequantize :
+                                                             mkldnn::algorithm::quantization_quantize;
 
         ops.append_quantization(alg, &cropLowData, &cropHighData, &inputScaleData, &inputShiftData, &outputScaleData, &outputShiftData);
     }
@@ -1603,35 +1612,8 @@ void MKLDNNQuantizeNode::appendPostOps(mkldnn::post_ops& ops) {
         isPostOpDataInitialized = true;
 }
 
-bool MKLDNNQuantizeNode::isNeedToDecompose(const std::shared_ptr<const ngraph::Node>& node) {
-    if (const auto fq = std::dynamic_pointer_cast<const ngraph::opset1::FakeQuantize>(node)) {
-        for (size_t i = 0; i < fq->get_input_size(); i++) {
-            if (fq->get_input_shape(i).size() > 5)
-                return true;
-        }
-
-        for (size_t i = 1; i < fq->get_input_size(); i++) {
-            size_t count_not_unit_axis = 0;
-            auto shape = fq->get_input_shape(i);
-
-            if (ngraph::shape_size(shape) != 1) {
-                size_t not_unit_axis = 0;
-                for (size_t i = 0; i < shape.size(); i++) {
-                    if (shape[i] > 1) {
-                        not_unit_axis = i;
-                        count_not_unit_axis++;
-                    }
-                }
-                if (count_not_unit_axis > 1 || not_unit_axis > 1)
-                    return true;
-            }
-        }
-    }
-    return false;
+bool MKLDNNFakeQuantizeNode::created() const {
+    return getType() == FakeQuantize;
 }
 
-bool MKLDNNQuantizeNode::created() const {
-    return getType() == Quantize;
-}
-
-REG_MKLDNN_PRIM_FOR(MKLDNNQuantizeNode, Quantize);
+REG_MKLDNN_PRIM_FOR(MKLDNNFakeQuantizeNode, FakeQuantize);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h
similarity index 89%
rename from inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h
rename to inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h
index 84c9884a82c..99ac82a50e6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h
@@ -15,12 +15,6 @@
 
 namespace MKLDNNPlugin {
 
-enum QuantizeOpType {
-    FakeQuantization,
-    Quantization,
-    Binarization,
-};
-
 struct jit_quantize_params {
     int c;
 
@@ -30,7 +24,7 @@ struct jit_quantize_params {
 
     InferenceEngine::Layout src_layout;
 
-    QuantizeOpType op_type;
+    Algorithm op_type;
 };
 
 struct jit_quantize_call_args {
@@ -68,10 +62,10 @@ struct jit_uni_quantize_kernel {
     jit_quantize_params jqp_;
 };
 
-class MKLDNNQuantizeNode : public MKLDNNNode {
+class MKLDNNFakeQuantizeNode : public MKLDNNNode {
 public:
-    MKLDNNQuantizeNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
-    ~MKLDNNQuantizeNode() override = default;
+    MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNFakeQuantizeNode() override = default;
 
     void initSupportedPrimitiveDescriptors() override;
     void getSupportedDescriptors() override;
@@ -81,8 +75,7 @@ public:
 
     size_t getAxis() const { return axis; }
 
-    bool isBinarization() const { return quantizeOpType == QuantizeOpType::Binarization; }
-    QuantizeOpType getOpType() const { return quantizeOpType; }
+    bool isBinarization() const { return getAlgorithm() == Algorithm::FQBinarization; }
 
     const float* getBinarizationTresholdsPtr() const { return &binarizationThresholds[0]; }
     const float* getBinarizationOutputMaskPtr() const { return reinterpret_cast<const float*>(&binarizationOutputMask[0]); }
@@ -113,7 +106,7 @@ public:
 
     void appendPostOps(mkldnn::post_ops& ops) override;
 
-    static bool isNeedToDecompose(const std::shared_ptr<const ngraph::Node>& node);
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
 
 private:
     void init() override;
@@ -124,6 +117,8 @@ private:
 
     size_t levels = 0;
 
+    bool binarization = false;
+
     std::vector<float> binarizationThresholds;
     std::vector<uint32_t> binarizationOutputMask;
 
@@ -153,11 +148,11 @@ private:
     InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::FP32;
     InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;
 
-    QuantizeOpType quantizeOpType = FakeQuantization;
-
     jit_quantize_params jqp = {};
 
     std::shared_ptr<jit_uni_quantize_kernel> quantize_kernel = nullptr;
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
index 46e01398166..e5b9ade8567 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
@@ -4,9 +4,9 @@
 
 #include "mkldnn_fullyconnected_node.h"
 #include "mkldnn_eltwise_node.h"
-#include "mkldnn_quantize_node.h"
-
-#include <legacy/ie_layers.h>
+#include "mkldnn_fake_quantize_node.h"
+#include "ngraph_transformations/op/fully_connected.hpp"
+#include <ngraph/opsets/opset1.hpp>
 #include <string>
 #include <vector>
 #include <mkldnn_extension_utils.h>
@@ -17,19 +17,36 @@ using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNFullyConnectedNode::MKLDNNFullyConnectedNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache), withBiases(false), baseInputsNumber(0) {
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0));
-    });
-    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-        if (internalBlobs.size() <= 1)
-            return MKLDNNMemoryDesc();
-        return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1));
-    });
+bool MKLDNNFullyConnectedNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto fc = std::dynamic_pointer_cast<const FullyConnectedNode>(op);
+        if (!fc) {
+            errorMessage = "Only legacy FullyConnected operation is supported";
+            return false;
+        }
+        if (fc->get_input_size() == 3 && std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fc->get_input_node_shared_ptr(BIAS_ID)) == nullptr) {
+            errorMessage = "Only Constant operation on 'bias' input is supported";
+            return false;
+        }
+        if (!one_of(fc->get_input_shape(DATA_ID).size(), 2, 3, 4)) {
+            errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(fc->get_input_shape(DATA_ID).size());
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
 
-    if (getCnnLayer()->type == "FullyConnected" || getCnnLayer()->type == "InnerProduct") {
-        baseInputsNumber = getCnnLayer().get()->insData.size();
+MKLDNNFullyConnectedNode::MKLDNNFullyConnectedNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache), withBiases(false) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "FullyConnected node with name '" + getName() + "'";
+
+        withBiases = op->get_input_size() == 3;
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
     }
 }
 
@@ -50,33 +67,27 @@ std::vector<memory::format_tag> MKLDNNFullyConnectedNode::getAvailableFormatsFor
 }
 
 void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
-    if (!descs.empty())
-        return;
+    if (getParentEdges().size() != 2 && getParentEdges().size() != 3)
+        IE_THROW() << errorPrefix << " has incorrect number of input edges";
+    if (getChildEdges().empty())
+        IE_THROW()<< errorPrefix << " has incorrect number of output edges";
 
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
-    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-    precision = getCnnLayer()->outData[0]->getPrecision();
-    auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
+    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(DATA_ID));
+    auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(DATA_ID));
 
     if (inputDataType == memory::data_type::f32) {
         outputDataType = memory::data_type::f32;
     }
 
-    if (baseInputsNumber > 1) {
-        if (!fusedWith.empty()) {
-            auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
-            if (lastFusedLayer) {
-                outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(lastFusedLayer->outData[0]->getPrecision());
-            }
-        }
-        auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->insData[1].lock()->getPrecision());
+    if (!fusedWith.empty()) {
+        outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0));
+    }
+    auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(WEIGHTS_ID));
 
-        //  We have to extend gemm_x8s8s32x_inner_product_fwd_t from oneDNN to support BF16 output data type
-        if ((!one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) || weightsDataType != memory::data_type::s8) &&
-                inputDataType != memory::data_type::bf16) {
-            inputDataType = memory::data_type::f32;
-            outputDataType = memory::data_type::f32;
-        }
+    //  We have to extend gemm_x8s8s32x_inner_product_fwd_t from oneDNN to support BF16 output data type
+    if ((!one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) || weightsDataType != memory::data_type::s8)
+            && inputDataType != memory::data_type::bf16) {
+        inputDataType = outputDataType = memory::data_type::f32;
     }
 
     if (one_of(inputDataType , memory::data_type::u8, memory::data_type::s8)
@@ -89,29 +100,9 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
         outputDataType = memory::data_type::bf16;
     }
 
-    auto * fcLayer = dynamic_cast<FullyConnectedLayer*>(getCnnLayer().get());
-    if (fcLayer == nullptr)
-        IE_THROW() << "Cannot convert fully connected layer.";
-    if (fcLayer->_weights == nullptr && baseInputsNumber == 1) {
-        IE_THROW() << "Weights are empty for layer: " << fcLayer->name
-                           << " used in MKLDNN node: " << getName() << "\n"
-                           << "Use the second argumemt of InferenceEngine::Core::ReadNetwork"
-                           << " to load them from .bin part of the IR";
-    }
-
-    if (getParentEdges().size() != baseInputsNumber)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
-    if (getChildEdges().empty())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
-
     MKLDNNDims inDims = getParentEdgeAt(0)->getDims();
     MKLDNNDims outDims = getChildEdgeAt(0)->getDims();
 
-    if (!one_of(inDims.ndims(), 2, 3, 4, 5)) {
-        IE_THROW() << "Unsupported source format for FC layer. Expected 5, 4, 3 or 2, got: "
-                           << inDims.ndims() << " dims.";
-    }
-
     if (inDims.ndims() == 3) {
         weightsDims = InferenceEngine::SizeVector({static_cast<size_t>(outDims[2]), static_cast<size_t>(inDims[2])});
     } else {
@@ -121,16 +112,6 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
     }
     biasesDims.push_back(weightsDims[0]);
 
-    if (baseInputsNumber == 1) {
-        internalBlobs.push_back(createInternalBlob(weightsDims, true));
-    }
-
-    withBiases = (fcLayer->_biases != nullptr && fcLayer->_biases->size() != 0) || baseInputsNumber == 3;
-
-    if (withBiases && baseInputsNumber == 1) {
-        internalBlobs.push_back(createInternalBlob(biasesDims, false));
-    }
-
     for (auto format : getAvailableFormatsForDims(inDims)) {
         MKLDNNMemoryDesc in_candidate(inDims, inputDataType, format);
         MKLDNNMemoryDesc out_candidate(outDims, outputDataType, memory::format_tag::any);
@@ -153,9 +134,10 @@ void MKLDNNFullyConnectedNode::createPrimitive() {
     auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
     auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
     if (withBiases)
-        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_BIAS, getBias()}, {DNNL_ARG_DST, dst}};
+        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getParentEdgeAt(WEIGHTS_ID)->getMemory().GetPrimitive()},
+                    {DNNL_ARG_BIAS, getParentEdgeAt(BIAS_ID)->getMemory().GetPrimitive()}, {DNNL_ARG_DST, dst}};
     else
-        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_DST, dst}};
+        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getParentEdgeAt(WEIGHTS_ID)->getMemory().GetPrimitive()}, {DNNL_ARG_DST, dst}};
 }
 
 void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) {
@@ -181,83 +163,27 @@ void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) {
     }
 }
 
+bool MKLDNNFullyConnectedNode::canFuse(const MKLDNNNodePtr& node) const {
+    return canFuseSimpleOperation(node);
+}
+
 void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
-    int blob_idx = 0;
     mkldnn::post_ops ops;
 
     for (auto &node : fusedWith) {
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode) {
-            quantizeNode->appendPostOps(ops);
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
+        if (fakeQuantizeNode) {
+            fakeQuantizeNode->appendPostOps(ops);
             continue;
         }
 
         auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-        if (eltwiseNode && (eltwiseNode->getOpType() == MulAdd || eltwiseNode->getOpType() == Prelu)) {
-            if (initWeights) {
-                auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(eltwiseNode->getCnnLayer().get());
-                int ndims = getParentEdgeAt(0)->getDims().ndims();
-                MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(ndims == 3 ? getChildEdgeAt(0)->getDims()[2] : getChildEdgeAt(0)->getDims()[1], 16))});
-
-                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format_tag::x);
-                PostOpsIntBlobMemory[blob_idx]->FillZero();
-
-                // In case ndims == 3 graph optimizer allows fusing only if all weights values are the same
-                if (depthwiseLayer->blobs["weights"]->size() == 1 || ndims == 3) {
-                    float broadcastValue = static_cast<float *>(depthwiseLayer->_weights->buffer())[0];
-                    for (int i = 0; i < PostOpsIntBlobMemory[blob_idx]->GetDesc().getDims()[0]; i++) {
-                        static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
-                    }
-                } else {
-                    PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::format_tag::x,
-                                                            depthwiseLayer->_weights->buffer(),
-                                                            depthwiseLayer->_weights->size() *
-                                                            MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
-                }
-
-                if (eltwiseNode->getAlgorithm() == algorithm::depthwise_scale_shift) {
-                    PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                    PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format_tag::x);
-                    PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
-
-                    // In case ndims == 3 graph optimizer allows fusing only if all biases values are the same
-                    if (depthwiseLayer->blobs["biases"]->size() == 1 || ndims == 3) {
-                        float broadcastValue = static_cast<float *>(depthwiseLayer->_biases->buffer())[0];
-                        for (int i = 0; i < PostOpsIntBlobMemory[blob_idx + 1]->GetDesc().getDims()[0]; i++) {
-                            static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
-                        }
-                    } else {
-                        PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::format_tag::x,
-                                                                    depthwiseLayer->_biases->buffer(),
-                                                                    depthwiseLayer->_biases->size() *
-                                                                    MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
-                    }
-
-                    ops.append_depthwise(eltwiseNode->getAlgorithm(),
-                                         (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
-                                         (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
-
-                    blob_idx += 2;
-                } else {
-                    ops.append_depthwise(eltwiseNode->getAlgorithm(),
-                                         (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
-                                         nullptr);
-
-                    blob_idx += 1;
-                }
-            } else {
-                ops.append_depthwise(eltwiseNode->getAlgorithm(),
-                                     nullptr,
-                                     nullptr);
-            }
-
+        if (eltwiseNode) {
+            eltwiseNode->appendPostOps(ops);
             continue;
         }
 
-        if (eltwiseNode) {
-            eltwiseNode->appendPostOps(ops);
-        }
+        IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
     }
 
     attr.set_post_ops(ops);
@@ -320,7 +246,8 @@ void MKLDNNFullyConnectedNode::createDescriptor(const std::vector<InferenceEngin
         bdt = mkldnn::memory::data_type::f32;
     } else if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) {
         wdt = memory::data_type::s8;
-        bdt = baseInputsNumber == 3 ? MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->insData[2].lock()->getPrecision()) : memory::data_type::f32;
+        if (withBiases)
+            bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(BIAS_ID));
     }
 
     if (inDesc.getDims().size() == 3) {
@@ -337,7 +264,7 @@ void MKLDNNFullyConnectedNode::createDescriptor(const std::vector<InferenceEngin
     MKLDNNMemoryDesc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any);
 
     if (withBiases) {
-        MKLDNNMemoryDesc bias_candidate(MKLDNNDims(biasesDims), bdt, memory::format_tag::any);
+        MKLDNNMemoryDesc bias_candidate(MKLDNNDims(inDims[BIAS_ID]), bdt, memory::format_tag::any);
         MKLDNNDescriptor desc(std::shared_ptr<inner_product_forward::desc>(
                 new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate,
                                                 bias_candidate, out_candidate)));
@@ -386,14 +313,6 @@ MKLDNNMemoryDesc MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_
     }
 }
 
-const mkldnn::memory& MKLDNNFullyConnectedNode::getWeights() const {
-    return baseInputsNumber > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive();
-}
-
-const mkldnn::memory& MKLDNNFullyConnectedNode::getBias() const {
-    return baseInputsNumber > 2 ? getParentEdgeAt(2)->getMemory().GetPrimitive() : internalBlobMemory[1]->GetPrimitive();
-}
-
 InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const {
     std::vector<InferenceEngine::Precision> inputPrecisions;
     // Don't take bias precision into account
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h
index 7afcd016057..4b5622a8cda 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h
@@ -14,7 +14,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNFullyConnectedNode : public MKLDNNNode {
 public:
-    MKLDNNFullyConnectedNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNFullyConnectedNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNFullyConnectedNode() override = default;
 
     std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const MKLDNNDims &dims) const override;
@@ -32,17 +32,18 @@ public:
                           const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
 
     size_t descInputNumbers(MKLDNNDescriptor desc) override {
-        return static_cast<size_t>(baseInputsNumber);
+        return static_cast<size_t>(getOriginalInputsNumber());
     }
 
     MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
     MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
 
-    const mkldnn::memory& getWeights() const;
-    const mkldnn::memory& getBias() const;
-
     InferenceEngine::Precision getRuntimePrecision() const override;
 
+    bool canFuse(const MKLDNNNodePtr& node) const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 protected:
     std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr();
 
@@ -53,9 +54,12 @@ private:
     std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
     void setPostOps(mkldnn::primitive_attr &attr, bool initWeights);
 
-    bool withBiases;
-    int baseInputsNumber;
+    bool withBiases = false;
+
+    std::string errorPrefix;
+    static const size_t DATA_ID = 0;
+    static const size_t WEIGHTS_ID = 1;
+    static const size_t BIAS_ID = 2;
 };
 
 }  // namespace MKLDNNPlugin
-
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp
new file mode 100644
index 00000000000..eabd4f52aac
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp
@@ -0,0 +1,145 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "mkldnn_gather_elements_node.h"
+#include <ngraph/opsets/opset1.hpp>
+#include <precision_utils.h>
+#include <utils/general_utils.h>
+#include "common/cpu_memcpy.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNGatherElementsNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto gatherElementsOp = ngraph::as_type_ptr<const ngraph::op::v6::GatherElements>(op);
+        if (!gatherElementsOp) {
+            errorMessage = "Node is not an instance of the GatherElements operation from operation set v6.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+
+    return true;
+}
+
+MKLDNNGatherElementsNode::MKLDNNGatherElementsNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+    errorPrefix_ = std::string("Layer GatherElements with name '") + op->get_friendly_name() + "'";
+
+    if (op->get_input_size() != 2 || op->get_output_size() != 1)
+        IE_THROW() << errorPrefix_ << " has invalid number of input/output edges.";
+
+    const auto& dataDims = op->get_input_shape(dataIndex_);
+    const auto& indicesDims = op->get_input_shape(indicesIndex_);
+    if (dataDims.size() != indicesDims.size())
+        IE_THROW() << errorPrefix_ << " has invalid input shapes. Inputs 'Data' and 'Indices' must have equal ranks.";
+
+    auto gatherElementsOp = ngraph::as_type_ptr<const ngraph::op::v6::GatherElements>(op);
+    auto axis = gatherElementsOp->get_axis();
+    if (axis < 0)
+        axis += dataDims.size();
+    if (axis < 0 || axis >= static_cast<int>(dataDims.size()))
+        IE_THROW() << errorPrefix_ << " has invalid axis attribute: " << axis;
+    axis_ = axis;
+
+    auto outputShape = op->get_output_shape(0);
+    strideAxDst_ = 1;
+    for (int i = outputShape.size() - 1; i > axis_; i--)
+        strideAxDst_ *= outputShape[i];
+    dstAxDim_ = op->get_output_shape(0)[axis_];
+    if (axis_ > 0) {
+        strideAx1Diff_ = 1;
+        for (int i = dataDims.size() - 1; i >= axis_; i--)
+            strideAx1Diff_ *= dataDims[i];
+        strideAx1Diff_ -= strideAxDst_ * outputShape[axis_];
+    }
+}
+
+void MKLDNNGatherElementsNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    Precision inDataPrecision = getOriginalInputPrecisionAtPort(dataIndex_);
+    if (!MKLDNNPlugin::one_of(inDataPrecision.size(),
+                              sizeof(PrecisionTrait<Precision::I32>::value_type),
+                              sizeof(PrecisionTrait<Precision::I16>::value_type),
+                              sizeof(PrecisionTrait<Precision::I8>::value_type))) {
+        IE_THROW() << errorPrefix_ << " has unsupported 'inputData' input precision: " << inDataPrecision;
+    }
+
+    Precision indicesPrecision = getOriginalInputPrecisionAtPort(indicesIndex_);
+    if (!MKLDNNPlugin::one_of(indicesPrecision, Precision::I32, Precision::I64)) {
+        IE_THROW() << errorPrefix_ << " has unsupported 'indices' input precision: " << indicesPrecision;
+    }
+
+    dataTypeSize_ = inDataPrecision.size();
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, inDataPrecision},
+                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                         {{TensorDescCreatorTypes::ncsp, inDataPrecision}},
+                         impl_desc_type::ref_any);
+}
+
+template <typename dataType>
+void MKLDNNGatherElementsNode::directExecution() {
+    const auto *srcData = reinterpret_cast<const dataType *>(getParentEdgeAt(dataIndex_)->getMemoryPtr()->GetPtr());
+    const auto *indices = reinterpret_cast<const int *>(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<dataType *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    const int outSize = getChildEdgeAt(0)->getBlob()->size();
+    auto threadBody = [&](const int ithr, const int nthr) {
+        int start(0lu), end(0lu);
+        splitter(outSize, nthr, ithr, start, end);
+        if (start >= end)
+            return;
+
+        int axStrideIt = start % strideAxDst_;
+        int dstAxIdx = (start / strideAxDst_) % dstAxDim_;
+        int dstShift0 = (start / strideAxDst_ / dstAxDim_) * strideAx1Diff_;
+
+        for (size_t o = start; o < end; o++, axStrideIt++) {
+            if (axStrideIt == strideAxDst_) {
+                axStrideIt = 0;
+                dstAxIdx++;
+                if (dstAxIdx == dstAxDim_) {
+                    dstAxIdx = 0;
+                    dstShift0 += strideAx1Diff_;
+                }
+            }
+            dstData[o] = srcData[o + dstShift0 + (indices[o] - dstAxIdx) * strideAxDst_];
+        }
+    };
+
+    parallel_nt(0, threadBody);
+}
+
+void MKLDNNGatherElementsNode::execute(mkldnn::stream strm) {
+    switch (dataTypeSize_) {
+        case sizeof(PrecisionTrait<Precision::I32>::value_type):
+            return directExecution<PrecisionTrait<Precision::I32>::value_type>();
+        case sizeof(PrecisionTrait<Precision::I16>::value_type):
+            return directExecution<PrecisionTrait<Precision::I16>::value_type>();
+        case sizeof(PrecisionTrait<Precision::I8>::value_type):
+            return directExecution<PrecisionTrait<Precision::I8>::value_type>();
+        default:
+            return IE_THROW() << "Unsupported data type size";
+    }
+}
+
+bool MKLDNNGatherElementsNode::created() const {
+    return getType() == GatherElements;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNGatherElementsNode, GatherElements)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h
new file mode 100644
index 00000000000..51c49473c13
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h
@@ -0,0 +1,43 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNGatherElementsNode : public MKLDNNNode {
+public:
+    MKLDNNGatherElementsNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNGatherElementsNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    const size_t dataIndex_ = 0;
+    const size_t indicesIndex_ = 1;
+
+    size_t axis_;
+    size_t dataTypeSize_;
+    int strideAxDst_;
+    int dstAxDim_;
+    int strideAx1Diff_;
+    std::string errorPrefix_;
+
+    template <typename dataType>
+    void directExecution();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp
new file mode 100644
index 00000000000..3e858dd309d
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp
@@ -0,0 +1,218 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "mkldnn_gather_nd_node.h"
+#include <ngraph/opsets/opset1.hpp>
+#include <precision_utils.h>
+#include <utils/general_utils.h>
+#include "common/cpu_memcpy.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNGatherNDNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto gatherElementsOp = ngraph::as_type_ptr<const ngraph::op::v5::GatherND>(op);
+        if (!gatherElementsOp) {
+            errorMessage = "Node is not an instance of the GatherND operation from operation set v5.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+
+    return true;
+}
+
+MKLDNNGatherNDNode::MKLDNNGatherNDNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+    _errorPrefix = std::string("Layer GatherND with name '") + op->get_friendly_name() + "'";
+
+    if (op->get_input_size() != 2 || op->get_output_size() != 1)
+        IE_THROW() << _errorPrefix << " has invalid number of input/output edges.";
+
+    const auto& dataDims = op->get_input_shape(_dataIndex);
+    const auto& indicesDims = op->get_input_shape(_indicesIndex);
+
+    auto gatherNdOp = ngraph::as_type_ptr<const ngraph::op::v5::GatherND>(op);
+    _batchDims = gatherNdOp->get_batch_dims();
+    if (_batchDims >= std::min(dataDims.size(), indicesDims.size()))
+        IE_THROW() << _errorPrefix << " has invalid batch_dims attribute: " << _batchDims;
+
+    _batchNum = 1lu;
+    for (size_t i = 0; i < _batchDims; i++) {
+        _batchNum *= indicesDims[i];
+    }
+
+    _sliceRank = indicesDims[indicesDims.size() - 1];
+    _dataRank = dataDims.size() - _batchDims;
+    if (_sliceRank > _dataRank)
+        IE_THROW() << _errorPrefix << " has invalid inputs shapes.";
+
+    _blockSize = 1;
+    for (size_t i = _sliceRank + _batchDims; i < dataDims.size(); i++) {
+        _blockSize *= dataDims[i];
+    }
+    _batchStep = 1;
+    for (size_t i = _batchDims; i < dataDims.size(); i++) {
+        _batchStep *= dataDims[i];
+    }
+}
+
+void MKLDNNGatherNDNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    Precision inDataPrecision = getOriginalInputPrecisionAtPort(_dataIndex);
+    if (!MKLDNNPlugin::one_of(inDataPrecision.size(),
+                              sizeof(PrecisionTrait<Precision::I32>::value_type),
+                              sizeof(PrecisionTrait<Precision::I16>::value_type),
+                              sizeof(PrecisionTrait<Precision::I8>::value_type))) {
+        IE_THROW() << _errorPrefix << " has unsupported 'data' input precision: " << inDataPrecision;
+    }
+
+    Precision indicesPrecision = getOriginalInputPrecisionAtPort(_indicesIndex);
+    if (!MKLDNNPlugin::one_of(indicesPrecision,
+                              Precision::I32, Precision::I64, Precision::I16, Precision::U16, Precision::I8, Precision::U8)) {
+        IE_THROW() << _errorPrefix << " has unsupported 'indices' input precision: " << indicesPrecision;
+    }
+
+    _dataTypeSize = inDataPrecision.size();
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, inDataPrecision},
+                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                         {{TensorDescCreatorTypes::ncsp, inDataPrecision}},
+                         impl_desc_type::ref_any);
+}
+
+template <typename dataType>
+void MKLDNNGatherNDNode::gatherElementwise() {
+    const auto *srcData = reinterpret_cast<const dataType *>(getParentEdgeAt(_dataIndex)->getMemoryPtr()->GetPtr());
+    const auto *indices = reinterpret_cast<const int *>(getParentEdgeAt(_indicesIndex)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<dataType *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    auto strides = getParentEdgeAt(_dataIndex)->getDesc().getBlockingDesc().getStrides();
+    const size_t* srcMultipliers = strides.data() + _batchDims;
+
+    const size_t cycles = getChildEdgeAt(0)->getBlob()->byteSize() / (sizeof(dataType) * _batchNum);
+    const size_t CS = cycles * _sliceRank;
+    const size_t CB = cycles * _blockSize;
+    const size_t workAmount = _batchNum * cycles;
+
+    auto threadBody = [&](const int ithr, const int nthr) {
+        size_t start(0lu), end(0lu);
+        splitter(workAmount, nthr, ithr, start, end);
+        if (start >= end)
+            return;
+        size_t bStart = start / cycles;
+        size_t cStart = start % cycles;
+        size_t workCounter = start;
+
+        const dataType* shiftedSrcData = srcData + bStart * _batchStep;
+        const int* shiftedIndices = indices + bStart * CS + cStart * _sliceRank;
+        dataType* shiftedDstData = dstData + bStart * CB + cStart * _blockSize;
+
+        for (size_t b = bStart; b < _batchNum; b++) {
+            for (size_t j = cStart; j < cycles; j++) {
+                size_t dataIdx = 0lu;
+                for (size_t i = 0lu; i < _sliceRank; i++)
+                    dataIdx += srcMultipliers[i] * shiftedIndices[i];
+                shiftedDstData[0] = shiftedSrcData[dataIdx];
+                shiftedDstData++;
+                shiftedIndices += _sliceRank;
+                if (++workCounter == end) {
+                    return;
+                }
+            }
+            cStart = 0lu;
+            shiftedSrcData += _batchStep;
+        }
+    };
+
+    parallel_nt(0, threadBody);
+}
+
+void MKLDNNGatherNDNode::gatherBlocks() {
+    const uint8_t* srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(_dataIndex)->getMemoryPtr()->GetPtr());
+    const int* indices = reinterpret_cast<const int *>(getParentEdgeAt(_indicesIndex)->getMemoryPtr()->GetPtr());
+    uint8_t* dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    std::vector<size_t> srcMultipliers(_sliceRank);
+    for (size_t i = 0; i < _sliceRank ; i++)
+        srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getDesc().getBlockingDesc().getStrides()[i + _batchDims];
+
+    const size_t batchStep = _batchStep * _dataTypeSize;
+    const size_t dataStep = _blockSize * _dataTypeSize;
+    const size_t cycles = getChildEdgeAt(0)->getBlob()->byteSize() / (dataStep * _batchNum);
+    const size_t CS = cycles * _sliceRank;
+    const size_t CB = cycles * dataStep;
+    const size_t workAmount = _batchNum * cycles;
+
+    auto threadBody = [&](const int ithr, const int nthr) {
+        size_t start(0lu), end(0lu);
+        splitter(workAmount, nthr, ithr, start, end);
+        if (start >= end)
+            return;
+        size_t bStart = start / cycles;
+        size_t cStart = start % cycles;
+        size_t workCounter = start;
+
+        const uint8_t* shiftedSrcData = srcData + bStart * batchStep;
+        const int* shiftedIndices = indices + bStart * CS + cStart * _sliceRank;
+        uint8_t* shiftedDstData = dstData + bStart * CB + cStart * dataStep;
+
+        for (size_t b = bStart; b < _batchNum; b++) {
+            for (size_t j = cStart; j < cycles; j++) {
+                size_t dataIdx = 0lu;
+                for (size_t i = 0; i < _sliceRank ; i++)
+                    dataIdx += srcMultipliers[i] * shiftedIndices[i];
+                cpu_memcpy(shiftedDstData, &(shiftedSrcData[dataIdx]), dataStep);
+                shiftedDstData += dataStep;
+                shiftedIndices += _sliceRank;
+                if (++workCounter == end) {
+                    return;
+                }
+            }
+            cStart = 0;
+            shiftedSrcData += batchStep;
+        }
+    };
+
+    parallel_nt(0, threadBody);
+}
+
+void MKLDNNGatherNDNode::execute(mkldnn::stream strm) {
+    if (_blockSize > 1) {
+        gatherBlocks();
+    } else {
+        switch (_dataTypeSize) {
+            case sizeof(PrecisionTrait<Precision::I32>::value_type):
+                gatherElementwise<PrecisionTrait<Precision::I32>::value_type>();
+                break;
+            case sizeof(PrecisionTrait<Precision::I16>::value_type):
+                gatherElementwise<PrecisionTrait<Precision::I16>::value_type>();
+                break;
+            case sizeof(PrecisionTrait<Precision::I8>::value_type):
+                gatherElementwise<PrecisionTrait<Precision::I8>::value_type>();
+                break;
+            default:
+                IE_THROW() << _errorPrefix + " has data input with unsupported precision: " + getOriginalInputPrecisionAtPort(_dataIndex).name();
+        }
+    }
+}
+
+bool MKLDNNGatherNDNode::created() const {
+    return getType() == GatherND;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNGatherNDNode, GatherND)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.h
new file mode 100644
index 00000000000..253710333c1
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNGatherNDNode : public MKLDNNNode {
+public:
+    MKLDNNGatherNDNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNGatherNDNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    size_t _dataRank;
+    size_t _sliceRank;
+    size_t _blockSize;
+    size_t _batchDims;
+    size_t _batchNum;
+    size_t _batchStep;
+    size_t _dataTypeSize;
+    const size_t _dataIndex = 0;
+    const size_t _indicesIndex = 1;
+    std::string _errorPrefix;
+
+    template <typename dataType>
+    void gatherElementwise();
+    void gatherBlocks();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp
new file mode 100644
index 00000000000..966fc4003c3
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp
@@ -0,0 +1,137 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "mkldnn_gather_node.h"
+#include <ngraph/opsets/opset1.hpp>
+#include <precision_utils.h>
+#include "common/cpu_memcpy.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto gatherOp = ngraph::as_type_ptr<const ngraph::op::v1::Gather>(op);
+        if (!gatherOp) {
+            errorMessage = "Only opset1 Gather operation is supported";
+            return false;
+        }
+
+        auto axesOp = gatherOp->get_input_node_shared_ptr(GATHER_AXIS);
+        if (!ngraph::as_type_ptr<const ngraph::op::Constant>(axesOp)) {
+            errorMessage = "Only Constant operation on 'axis' input is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+
+    return true;
+}
+
+MKLDNNGatherNode::MKLDNNGatherNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    errorPrefix_ = std::string("Layer Gather with name '") + op->get_friendly_name() + "' ";
+
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    auto gatherOp = ngraph::as_type_ptr<ngraph::op::v1::Gather>(op);
+    if (gatherOp->get_input_size() != 3 || gatherOp->get_output_size() != 1)
+        IE_THROW() << errorPrefix_ << "has incorrect number of input/output edges!";
+
+    const SizeVector& dictionary_dims = gatherOp->get_input_shape(GATHER_DICTIONARY);
+    if (dictionary_dims.size() == 0)
+        IE_THROW() << errorPrefix_ << "has incorrect input parameters dimension!";
+
+    axis = static_cast<int>(gatherOp->get_axis());
+    if (axis < 0)
+        axis += dictionary_dims.size();
+    // Dictionary must be at least rank axis + 1
+    if (!(-static_cast<int>(dictionary_dims.size()) <= axis && axis < static_cast<int>(dictionary_dims.size())))
+        IE_THROW() << errorPrefix_ << "has incorrect input parameters dimensions and axis number!";
+
+    //  Find number of dictionaries, index range and data length
+    for (int i = 0; i < axis; i++)
+        numDictionaries *= dictionary_dims[i];
+    indexRange = dictionary_dims[axis];
+    for (size_t i = axis + 1; i < dictionary_dims.size(); i++)
+        dataLength *= dictionary_dims[i];
+
+    if (dataLength == 0)
+        IE_THROW() << errorPrefix_ << "had incorrect input parameters dimension!";
+}
+
+void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    Precision inIdxPrecision = getOriginalInputPrecisionAtPort(GATHER_INDEXES);
+    if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::FP16)
+        inIdxPrecision = Precision::I32;
+
+    Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DICTIONARY);
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision},
+                          {TensorDescCreatorTypes::ncsp, inIdxPrecision},
+                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                         {{TensorDescCreatorTypes::ncsp, dataPrecision}},
+                         impl_desc_type::ref_any);
+}
+
+template <typename index_t, class Conversion>
+void MKLDNNGatherNode::gather() {
+    size_t src_indexSize = getParentEdgeAt(GATHER_INDEXES)->getBlob()->size();
+    size_t outputSize = getChildEdgeAt(0)->getBlob()->byteSize();
+    const auto *src_index = reinterpret_cast<const index_t *>(getParentEdgeAt(GATHER_INDEXES)->getMemoryPtr()->GetPtr());
+    const auto *src_dataDict = reinterpret_cast<const uint8_t *>(getParentEdgeAt(GATHER_DICTIONARY)->getMemoryPtr()->GetPtr());
+    auto *dst_data = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    size_t len = dataLength * getParentEdgeAt(GATHER_DICTIONARY)->getDesc().getPrecision().size();
+
+    parallel_for(src_indexSize, [&](size_t i) {
+        unsigned int idx = Conversion()(src_index[i]);
+
+        //  Index clipping
+        if (idx < indexRange) {
+            //  Copying data to destination from Dictionary
+            for (size_t j = 0; j < numDictionaries; j++) {
+                cpu_memcpy_s(&dst_data[len * (i + j * src_indexSize)],
+                            outputSize - (len * (i + j * src_indexSize)),
+                            &src_dataDict[len * (idx + j * indexRange)],
+                            len);
+            }
+        } else {
+            for (size_t j = 0; j < numDictionaries; j++) {
+                memset(&dst_data[len * (i + j * src_indexSize)], 0, len);
+            }
+        }
+    });
+}
+
+void MKLDNNGatherNode::execute(mkldnn::stream strm) {
+    switch (getParentEdgeAt(GATHER_INDEXES)->getDesc().getPrecision()) {
+        case Precision::FP32:
+            gather<float, f32toUi32>();
+            break;
+        case Precision::I32:
+            gather<int32_t, i32toUi32>();
+            break;
+        default:
+            return IE_THROW() << "Unsupported indices input precision";
+    }
+}
+
+bool MKLDNNGatherNode::created() const {
+    return getType() == Gather;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNGatherNode, Gather)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h
new file mode 100644
index 00000000000..96ed3846d8b
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNGatherNode : public MKLDNNNode {
+public:
+    MKLDNNGatherNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNGatherNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    struct f32toUi32 {
+        inline unsigned int operator()(const float value) {
+            return static_cast<unsigned int>(value);
+        }
+    };
+
+    struct i32toUi32 {
+        inline unsigned int operator()(const int32_t value) {
+            return static_cast<unsigned int>(value);
+        }
+    };
+
+    int axis = 0;
+    size_t numDictionaries = 1;
+    size_t indexRange = 0;
+    size_t dataLength = 1;
+    static const size_t GATHER_DICTIONARY = 0;
+    static const size_t GATHER_INDEXES = 1;
+    static const size_t GATHER_AXIS = 2;
+
+    std::string errorPrefix_;
+
+    template <typename index_t, class Conversion>
+    void gather();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
index df7d9f4c37e..86f89ccea7c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
@@ -12,16 +12,13 @@
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 
-MKLDNNGenericNode::MKLDNNGenericNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {
-    params = layer->params;
-    blobs = layer->blobs;
+MKLDNNGenericNode::MKLDNNGenericNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache), ngraphOp(op) {
 }
 
 void MKLDNNGenericNode::getSupportedDescriptors() {
     if (!extFactory && impls.empty()) {
-        std::string type = getCnnLayer() ? getCnnLayer()->type : "Generic";
-        IE_THROW() << "Cannot get generic primitive for layer: " << getName() << " with type: " << type;
+        IE_THROW() << "Cannot get generic primitive for layer: " << getName() << " with type: " << getTypeStr();
     }
 }
 
@@ -30,23 +27,6 @@ void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() {
         return;
 
     InferenceEngine::ResponseDesc resp;
-    if (impls.empty()) {
-        if (!extFactory)
-            IE_THROW() << "Descriptor for generic primitive doesn't exist";
-
-        std::vector<InferenceEngine::ILayerImpl::Ptr> impls_no_exec;
-
-        InferenceEngine::StatusCode rc = extFactory->getImplementations(impls_no_exec, &resp);
-        for (const auto& impl : impls_no_exec) {
-            if (auto exec_impl = std::dynamic_pointer_cast<InferenceEngine::ILayerExecImpl>(impl)) {
-                impls.emplace_back(exec_impl);
-            }
-        }
-        if (rc != InferenceEngine::OK) {
-            IE_THROW() << resp.msg;
-        }
-    }
-
     for (auto &impl : impls) {
         std::vector<InferenceEngine::LayerConfig> configs;
         auto rc = impl->getSupportedConfigurations(configs, &resp);
@@ -84,16 +64,33 @@ bool MKLDNNGenericNode::created() const {
 }
 
 bool MKLDNNGenericNode::created(const MKLDNNExtensionManager::Ptr &extMgr) {
-    if (getCnnLayer() && extMgr) {
+    if (ngraphOp && extMgr) {
         // We should save extension manager in order to avoid situation when
         // it will destroyed before extensibility primitives
-        if (getCnnLayer()->getNode()) {
-            auto impl = extMgr->CreateImplementation(getCnnLayer()->getNode());
-            if (auto execImpl = std::dynamic_pointer_cast<InferenceEngine::ILayerExecImpl>(impl))
-                impls.emplace_back(execImpl);
-        }
+        auto impl = extMgr->CreateImplementation(ngraphOp);
+        if (auto execImpl = std::dynamic_pointer_cast<InferenceEngine::ILayerExecImpl>(impl))
+            impls.emplace_back(execImpl);
+
         if (impls.empty()) {
-            extFactory = extMgr->CreateExtensionFactory(getCnnLayer());
+            extFactory = extMgr->CreateExtensionFactory(ngraphOp);
+
+            if (!extFactory)
+                IE_THROW(NotImplemented);
+
+            std::vector<InferenceEngine::ILayerImpl::Ptr> impls_no_exec;
+            InferenceEngine::ResponseDesc resp;
+            InferenceEngine::StatusCode rc = extFactory->getImplementations(impls_no_exec, &resp);
+            if (rc == InferenceEngine::NOT_IMPLEMENTED) {
+                IE_THROW(NotImplemented) << resp.msg;
+            } else if (rc != InferenceEngine::OK) {
+                IE_THROW() << resp.msg;
+            }
+
+            for (const auto& impl : impls_no_exec) {
+                if (auto exec_impl = std::dynamic_pointer_cast<InferenceEngine::ILayerExecImpl>(impl)) {
+                    impls.emplace_back(exec_impl);
+                }
+            }
         }
 
         if (extFactory || !impls.empty())
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h
index 98160351cca..f93b79c7852 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h
@@ -16,7 +16,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNGenericNode : public MKLDNNNode {
 public:
-    MKLDNNGenericNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNGenericNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNGenericNode() = default;
 
     void getSupportedDescriptors() override;
@@ -34,12 +34,11 @@ public:
     void execLayer();
     void cleanup() override;
 
-
 protected:
     InferenceEngine::ILayerImplFactory::Ptr extFactory;
     std::vector<InferenceEngine::ILayerExecImpl::Ptr> impls;
-    std::map<std::string, std::string> params;
-    std::map<std::string, InferenceEngine::Blob::Ptr> blobs;
+
+    const std::shared_ptr<ngraph::Node> ngraphOp;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
index ea478185720..35f9e6867e0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
@@ -3,28 +3,68 @@
 //
 
 #include "mkldnn_input_node.h"
-#include "../mkldnn_extension_utils.h"
+#include "mkldnn_extension_utils.h"
 #include <string>
 #include <tuple>
 #include <algorithm>
+#include <utils/general_utils.h>
+#include <ngraph/ops.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <blob_factory.hpp>
 #include "caseless.hpp"
 #include "common/cpu_memcpy.h"
 #include "common/cpu_convert.h"
+#include "utils/cpu_utils.hpp"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
-using namespace InferenceEngine::details;
+using namespace InferenceEngine;
+using namespace details;
+using namespace ngraph::op;
+
+MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    if (!one_of(op->get_type_info(),
+            v0::Parameter::type_info,
+            v0::Constant::type_info,
+            v0::Result::type_info,
+            v3::ReadValue::type_info,
+            v6::ReadValue::type_info))
+        IE_THROW(NotImplemented) << "CPU Input node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name();
 
-MKLDNNInputNode::MKLDNNInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {
     constant = ConstantType::NoConst;
-    if (layer && CaselessEq<std::string>()(layer->type, "const")) {
+
+    auto constOp = ngraph::as_type_ptr<ngraph::op::Constant>(op);
+    if (constOp) {
         constant = ConstantType::Const;
-        if (layer->blobs.size() != 1 || getType() != Input || !layer->blobs.begin()->second)
-            IE_THROW() << "Incorrect const input " << getName();
-        constBlob = layer->blobs.begin()->second;
-    } else {
-        constBlob = nullptr;
+
+        auto dataPrecision = convertPrecision(op->get_element_type());
+
+        size_t shapeSize = ngraph::shape_size(op->get_shape());
+        constexpr size_t byte_size{8};
+        if (dataPrecision == Precision::BIN) {
+            shapeSize = (shapeSize + (byte_size - 1)) / byte_size;
+        }
+
+        TensorDesc td(dataPrecision, {shapeSize}, Layout::C);
+
+        auto blob = make_blob_with_precision(td, const_cast<void*>(constOp->get_data_ptr()));
+        blob->allocate();
+
+        constBlob = blob;
+    }
+}
+
+MKLDNNInputNode::MKLDNNInputNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &prc, const std::string &name,
+                                 const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(type, name, eng, cache) {
+    constant = ConstantType::NoConst;
+    if (getType() == Input) {
+        outDims.emplace_back(dims);
+        addOriginalOutputPrecision(prc);
+    }  else if (getType() == Output) {
+        inDims.emplace_back(dims);
+        addOriginalInputPrecision(prc);
     }
 }
 
@@ -46,28 +86,38 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::LayerConfig config;
+    LayerConfig config;
     config.dynBatchSupport = true;
     if (getType() == Input || getType() == MemoryInput) {
-        precision = getCnnLayer()->outData[0]->getPrecision();
-        if (precision == InferenceEngine::Precision::U16 || isMeanImage) {
-            precision = InferenceEngine::Precision::FP32;
+        precision = getOriginalOutputPrecisionAtPort(0);
+        if (precision == Precision::U16 || isMeanImage) {
+            precision = Precision::FP32;
         }
-        InferenceEngine::DataConfig dataConfig;
+        DataConfig dataConfig;
         dataConfig.inPlace = -1;
         dataConfig.constant = false;
 
-        auto mem_tdesc = MKLDNNMemoryDesc(getCnnLayer()->outData[0]->getTensorDesc());
+        auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
+        auto mem_tdesc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType);
         dataConfig.desc = mem_tdesc;
         config.outConfs.push_back(dataConfig);
+        // ReadValue operation expects constant input
+        if (!getParentEdges().empty()) {
+            DataConfig inConfig;
+            inConfig.inPlace = -1;
+            inConfig.constant = true;
+            inConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType);
+            config.inConfs.push_back(inConfig);
+        }
     } else if (getType() == Output) {
-        precision = getCnnLayer()->insData[0].lock()->getPrecision();
-        if (precision == InferenceEngine::Precision::U16) precision = InferenceEngine::Precision::FP32;
-        InferenceEngine::DataConfig dataConfig;
+        precision = getOriginalInputPrecisionAtPort(0);
+        if (precision == Precision::U16) precision = Precision::FP32;
+        DataConfig dataConfig;
         dataConfig.inPlace = -1;
         dataConfig.constant = false;
 
-        auto mem_tdesc = MKLDNNMemoryDesc(getCnnLayer()->insData[0].lock()->getTensorDesc());
+        auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
+        auto mem_tdesc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType);
         dataConfig.desc = mem_tdesc;
         config.inConfs.push_back(dataConfig);
     }
@@ -98,13 +148,13 @@ bool MKLDNNInputNode::created() const {
 }
 
 namespace {
-    bool isDefaultOrder(const InferenceEngine::SizeVector &order) {
+    bool isDefaultOrder(const SizeVector &order) {
         return std::is_sorted(order.begin(), order.end(),
                                 [](size_t a, size_t b) { return a + 1 == b; });
     }
 
-    std::tuple<bool, size_t> isDefaultStrides(const InferenceEngine::SizeVector &strides,
-                                              const InferenceEngine::SizeVector &dims) {
+    std::tuple<bool, size_t> isDefaultStrides(const SizeVector &strides,
+                                              const SizeVector &dims) {
         if (strides.size() != dims.size())
             return std::make_tuple(false, 0);
 
@@ -119,7 +169,7 @@ namespace {
         return std::make_tuple(true, dim);
     }
 
-    bool isCompatibleTensors(const InferenceEngine::TensorDesc &lhs, const InferenceEngine::TensorDesc &rhs,
+    bool isCompatibleTensors(const TensorDesc &lhs, const TensorDesc &rhs,
                              bool isNeedPrecValid = true) {
         auto const &lhsBlockingDesc = lhs.getBlockingDesc();
         auto const &rhsBlockingDesc = rhs.getBlockingDesc();
@@ -144,14 +194,17 @@ void MKLDNNInputNode::execute(mkldnn::stream strm) {
         return;
     auto dstBlob = getChildEdgeAt(0)->getBlob();
 
+    if (isEmptyTensorDesc(dstBlob->getTensorDesc()) || isEmptyTensorDesc(constBlob->getTensorDesc()))
+        return;
+
     if (constBlob->getTensorDesc() == dstBlob->getTensorDesc()
         || isCompatibleTensors(constBlob->getTensorDesc(), dstBlob->getTensorDesc())) {
         const int8_t *srcData = constBlob->cbuffer().as<int8_t *>();
         int8_t *dstData = dstBlob->buffer();
 
         cpu_memcpy_s(dstData, dstBlob->byteSize(), srcData, constBlob->byteSize());
-    } else if (constBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::BIN ||
-               dstBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::BIN) {
+    } else if (constBlob->getTensorDesc().getPrecision() == Precision::BIN ||
+               dstBlob->getTensorDesc().getPrecision() == Precision::BIN) {
         size_t dstSize = dstBlob->size() / 8;
         if (constBlob->size() != dstSize) {
             IE_THROW() << "Incorrect blob sizes for node " << getName();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
index 7f7024371c2..fa1cc0c49c3 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
@@ -12,7 +12,9 @@ namespace MKLDNNPlugin {
 
 class MKLDNNInputNode : public MKLDNNNode {
 public:
-    MKLDNNInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNInputNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &prc, const std::string &name,
+                    const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNInputNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -25,10 +27,14 @@ public:
         isMeanImage = true;
     }
 
+    const InferenceEngine::Blob::CPtr getConstBlob() const {
+        return constBlob;
+    }
+
 private:
     InferenceEngine::Precision precision;
 
-    InferenceEngine::Blob::Ptr constBlob;
+    InferenceEngine::Blob::Ptr constBlob = nullptr;
     bool isMeanImage = false;
 };
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
index 935bf3bae61..caedec83ee8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
@@ -4,15 +4,13 @@
 
 #include "mkldnn_interpolate_node.h"
 
-#include "mkldnn_quantize_node.h"
-#include <legacy/ie_layers.h>
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_eltwise_node.h"
 #include <mkldnn.hpp>
 #include <string>
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <legacy/ie_layers_internal.hpp>
 #include "ie_parallel.hpp"
 #include <algorithm>
 
@@ -25,6 +23,9 @@
 #include "utils/bfloat16.hpp"
 #include "emitters/jit_bf16_emitters.hpp"
 
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset4.hpp>
+
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -1600,27 +1601,6 @@ private:
     }
 };
 
-MKLDNNInterpolateNode::MKLDNNInterpolateNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {
-    std::string modeString = layer->GetParamAsString("mode");
-    if (modeString == "nearest") {
-        mode = InterpolateMode::nearest;
-    } else if (modeString == "linear") {
-        size_t rank = layer->insData[0].lock()->getDims().size();
-        if (rank < 5) {
-            mode = InterpolateMode::linear_onnx;
-        } else {
-            mode = InterpolateMode::linear;
-        }
-    } else if (modeString == "linear_onnx") {
-        mode = InterpolateMode::linear_onnx;
-    } else if (modeString == "cubic") {
-        mode = InterpolateMode::cubic;
-    } else {
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support interpolate mode:" << modeString;
-    }
-}
-
 // shapeND: n     c     d     h    w
 // blockND: ncdhw cdhw  dhw   hw   w    1
 // index  : 0      1    2     3    4    5
@@ -1656,83 +1636,203 @@ SizeVector to5Dim(SizeVector casesDim) {
     return dim5;
 }
 
-void MKLDNNInterpolateNode::getSupportedDescriptors() {
-    if (!descs.empty())
-        return;
+using ngInterpMode = ngraph::opset4::Interpolate::InterpolateMode;
+using ngInterpCoordTransf = ngraph::opset4::Interpolate::CoordinateTransformMode;
+using ngInterpNearMode = ngraph::opset4::Interpolate::NearestMode;
+using ngInterpShapeCalcMode = ngraph::opset4::Interpolate::ShapeCalcMode;
 
+bool MKLDNNInterpolateNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto interp = std::dynamic_pointer_cast<const ngraph::opset4::Interpolate>(op);
+        if (!interp) {
+            errorMessage = "Only opset4 Interpolate operation is supported";
+            return false;
+        }
+        const auto &interpAttr = interp->get_attrs();
+        const auto &interpMode = interpAttr.mode;
+        if (!one_of(interpMode, ngInterpMode::nearest, ngInterpMode::linear, ngInterpMode::linear_onnx, ngInterpMode::cubic)) {
+            errorMessage = "Does not support interpolate mode: " + ngraph::as_string(interpMode);
+            return false;
+        }
+
+        const auto &interpCoordTransMode = interpAttr.coordinate_transformation_mode;
+        if (!one_of(interpCoordTransMode, ngInterpCoordTransf::half_pixel, ngInterpCoordTransf::pytorch_half_pixel, ngInterpCoordTransf::asymmetric,
+                                          ngInterpCoordTransf::tf_half_pixel_for_nn, ngInterpCoordTransf::align_corners)) {
+            errorMessage = "Does not support coordinate transformation mode: " + ngraph::as_string(interpCoordTransMode);
+            return false;
+        }
+
+        if (interpMode == ngInterpMode::nearest) {
+            const auto &interpNearestMode = interpAttr.nearest_mode;
+            if (!one_of(interpNearestMode, ngInterpNearMode::round_prefer_floor, ngInterpNearMode::round_prefer_ceil, ngInterpNearMode::floor,
+                                           ngInterpNearMode::ceil, ngInterpNearMode::simple)) {
+                errorMessage = "Does not support nearest round mode: " + ngraph::as_string(interpNearestMode);
+                return false;
+            }
+        }
+
+        const auto &interpShapeCalcMode = interpAttr.shape_calculation_mode;
+        if (!one_of(interpShapeCalcMode, ngInterpShapeCalcMode::scales, ngInterpShapeCalcMode::sizes)) {
+            errorMessage = "Does not support shape_calculation_mode: " + ngraph::as_string(interpShapeCalcMode);
+            return false;
+        }
+
+        const size_t dataRank = interp->get_input_shape(DATA_ID).size();
+        if (dataRank < 1 || dataRank > 5) {
+            errorMessage = "Does not support input tensor of rank : " + std::to_string(dataRank);
+            return false;
+        }
+
+        if (dataRank == 5 && interpMode == ngInterpMode::cubic) {
+            errorMessage = "Doesn't support input tensor with rank: " + std::to_string(dataRank) + " for 'cubic' mode ";
+            return false;
+        }
+
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(interp->get_input_node_shared_ptr(SCALES_ID)) == nullptr) {
+            errorMessage = "Only const 'scales' input is supported";
+            return false;
+        }
+
+        if (interp->get_input_size() > 3 && std::dynamic_pointer_cast<const ngraph::opset1::Constant>(interp->get_input_node_shared_ptr(AXES_ID)) == nullptr) {
+            errorMessage = "Only const 'axes' input is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNInterpolateNode::MKLDNNInterpolateNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "Interpolate node with name '" + getName() + "'";
+
+        const auto interp = std::dynamic_pointer_cast<const ngraph::opset4::Interpolate>(op);
+
+        if (interp->get_input_size() != 3 && interp->get_input_size() != 4)
+            IE_THROW() << errorPrefix << " has incorrect number of input edges";
+        if (interp->get_output_size() != 1)
+            IE_THROW() << errorPrefix << " has incorrect number of output edges";
+        isAxesSpecified = interp->get_input_size() != 3;
+
+        const auto &interpAttr = interp->get_attrs();
+
+        const size_t dataRank = interp->get_input_shape(DATA_ID).size();
+        const auto &interpMode = interpAttr.mode;
+        if (interpMode == ngInterpMode::nearest) {
+            mode = InterpolateMode::nearest;
+        } else if (interpMode == ngInterpMode::linear) {
+            if (dataRank < 5) {
+                mode = InterpolateMode::linear_onnx;
+            } else {
+                mode = InterpolateMode::linear;
+            }
+        } else if (interpMode == ngInterpMode::linear_onnx) {
+            mode = InterpolateMode::linear_onnx;
+        } else if (interpMode == ngInterpMode::cubic) {
+            mode = InterpolateMode::cubic;
+        }
+
+        switch (dataRank) {
+            case 1:
+            case 3:
+                spatialDimSize = 1;
+                break;
+            case 2:
+            case 4:
+                spatialDimSize = 2;
+                break;
+            case 5:
+                spatialDimSize = 3;
+                break;
+        }
+
+        const auto &interpCoordTransMode = interpAttr.coordinate_transformation_mode;
+        if (interpCoordTransMode == ngInterpCoordTransf::half_pixel) {
+            coordTransMode = InterpolateCoordTransMode::half_pixel;
+        } else if (interpCoordTransMode == ngInterpCoordTransf::pytorch_half_pixel) {
+            coordTransMode = InterpolateCoordTransMode::pytorch_half_pixel;
+        } else if (interpCoordTransMode == ngInterpCoordTransf::asymmetric) {
+            coordTransMode = InterpolateCoordTransMode::asymmetric;
+        } else if (interpCoordTransMode == ngInterpCoordTransf::tf_half_pixel_for_nn) {
+            coordTransMode = InterpolateCoordTransMode::tf_half_pixel_for_nn;
+        } else if (interpCoordTransMode == ngInterpCoordTransf::align_corners) {
+            coordTransMode = InterpolateCoordTransMode::align_corners;
+        }
+
+        if (mode == InterpolateMode::nearest) {
+            const auto &interpNearestMode = interpAttr.nearest_mode;
+            if (interpNearestMode == ngInterpNearMode::round_prefer_floor) {
+                nearestMode = InterpolateNearestMode::round_prefer_floor;
+            } else if (interpNearestMode == ngInterpNearMode::round_prefer_ceil) {
+                nearestMode = InterpolateNearestMode::round_prefer_ceil;
+            } else if (interpNearestMode == ngInterpNearMode::floor) {
+                nearestMode = InterpolateNearestMode::floor;
+            } else if (interpNearestMode == ngInterpNearMode::ceil) {
+                nearestMode = InterpolateNearestMode::ceil;
+            } else if (interpNearestMode == ngInterpNearMode::simple) {
+                nearestMode = InterpolateNearestMode::simple;
+            }
+        } else if (mode == InterpolateMode::cubic) {
+            cubeCoeff = static_cast<float>(interpAttr.cube_coeff);
+        }
+        antialias = interpAttr.antialias;
+
+        const auto &interpShapeCalcMode = interpAttr.shape_calculation_mode;
+        if (interpShapeCalcMode == ngInterpShapeCalcMode::scales) {
+            shapeCalcMode = InterpolateShapeCalcMode::scales;
+        } else if (interpShapeCalcMode == ngInterpShapeCalcMode::sizes) {
+            shapeCalcMode = InterpolateShapeCalcMode::sizes;
+        }
+
+        if (interpAttr.pads_begin.empty()) {
+            padBegin.resize(dataRank, 0);
+        } else {
+            padBegin.resize(interpAttr.pads_begin.size());
+            for (size_t i = 0; i < interpAttr.pads_begin.size(); i++)
+                padBegin[i] = static_cast<int>(interpAttr.pads_begin[i]);
+        }
+
+        if (interpAttr.pads_end.empty()) {
+            padEnd.resize(dataRank, 0);
+        } else {
+            padEnd.resize(interpAttr.pads_end.size());
+            for (size_t i = 0; i < interpAttr.pads_end.size(); i++)
+                padEnd[i] = static_cast<int>(interpAttr.pads_end[i]);
+        }
+
+        scales = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(interp->get_input_node_shared_ptr(SCALES_ID))->cast_vector<float>();
+
+        if (isAxesSpecified) {
+            axes = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(interp->get_input_node_shared_ptr(AXES_ID))->cast_vector<int>();
+        } else {
+            axes.resize(dataRank);
+            for (int i = 0; i < dataRank; i++) {
+                axes[i] = i;
+            }
+        }
+
+        if (scales.size() != axes.size()) {
+            IE_THROW() << errorPrefix << " does not have the same number elements in scales as in axis.";
+        }
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
+
+void MKLDNNInterpolateNode::getSupportedDescriptors() {
     if (getParentEdges().size() != 3 && getParentEdges().size() != 4)
         // data, target_shape, scale, axis(optional).
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' has incorrect number of input edges";
-    isAxesSpecified = (getParentEdges().size() == 3) ? false : true;
+        IE_THROW() << errorPrefix << " has incorrect number of input edges";
     if (getChildEdges().empty())
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' has incorrect number of output edges";
+        IE_THROW() << errorPrefix << " has incorrect number of output edges";
 
     srcDim = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector();
     int dataRank = srcDim.size();
-    switch (dataRank) {
-        case 1:
-        case 3:
-            spatialDimSize = 1;
-            break;
-        case 2:
-        case 4:
-            spatialDimSize = 2;
-            break;
-        case 5:
-            if (mode != InterpolateMode::cubic) {
-                spatialDimSize = 3;
-            } else {
-                IE_THROW() << "Interpolate layer with name '" << getName() <<
-                "' of 'cubic' mode only support input tensor of 2 or 4 rank";
-            }
-            break;
-        default:
-            IE_THROW() << "Interpolate layer with name '" << getName() <<
-            "' does not support input tensor of rank :" << dataRank;
-            break;
-    }
-
-    auto *layer = getCnnLayer().get();
-    std::string modeString = layer->GetParamAsString("coordinate_transformation_mode", "half_pixel");
-    if (modeString == "half_pixel") {
-        coordTransMode = InterpolateCoordTransMode::half_pixel;
-    } else if (modeString == "pytorch_half_pixel") {
-        coordTransMode = InterpolateCoordTransMode::pytorch_half_pixel;
-    } else if (modeString == "asymmetric") {
-        coordTransMode = InterpolateCoordTransMode::asymmetric;
-    } else if (modeString == "tf_half_pixel_for_nn") {
-        coordTransMode = InterpolateCoordTransMode::tf_half_pixel_for_nn;
-    } else if (modeString == "align_corners") {
-        coordTransMode = InterpolateCoordTransMode::align_corners;
-    } else {
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support coordinate transformation mode: " << modeString;
-    }
-
-    if (mode == InterpolateMode::nearest) {
-        modeString = layer->GetParamAsString("nearest_mode", "round_prefer_floor");
-        if (modeString == "round_prefer_floor") {
-            nearestMode = InterpolateNearestMode::round_prefer_floor;
-        } else if (modeString == "round_prefer_ceil") {
-            nearestMode = InterpolateNearestMode::round_prefer_ceil;
-        } else if (modeString == "floor") {
-            nearestMode = InterpolateNearestMode::floor;
-        } else if (modeString == "ceil") {
-            nearestMode = InterpolateNearestMode::ceil;
-        } else if (modeString == "simple") {
-            nearestMode = InterpolateNearestMode::simple;
-        } else {
-            IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support nearest round mode: " << modeString;
-        }
-    } else if (mode == InterpolateMode::cubic) {
-        cubeCoeff = layer->GetParamAsFloat("cube_coeff", -0.75);
-    }
-    antialias = layer->GetParamAsBool("antialias", false);
-    shapeInferMode = layer->GetParamAsString("shape_calculation_mode");
 
     // get pad
-    std::vector<int> defPad(dataRank, 0);
-    padBegin = layer->GetParamAsInts("pads_begin", defPad);
-    padEnd = layer->GetParamAsInts("pads_end", defPad);
     for (int i = 0; i < padBegin.size(); i++) {
         if (padBegin[i] != 0) {
             hasPad = true;
@@ -1769,46 +1869,6 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() {
         srcDimPad = srcDim;
     }
     dstDim = getChildEdgeAt(0)->getDims().ToSizeVector();
-
-    // extract const buffer
-    auto scalesLayer = getParentEdgesAtPort(SCALES_ID)[0]->getParent()->getCnnLayer();
-    if (scalesLayer->type == "Const") {
-        auto scalesBlob = dynamic_cast<TBlob<float>*>(scalesLayer->blobs["custom"].get());
-        auto scalesData = scalesBlob->buffer().as<float*>();
-        int scalesLen = getParentEdgeAt(SCALES_ID)->getDims()[0];
-        scales.resize(scalesLen);
-        for (int i = 0; i < scalesLen; i++) {
-            scales[i] = scalesData[i];
-        }
-    } else {
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' only supports const 'scales' input.";
-    }
-
-    if (isAxesSpecified) {
-        auto axesLayer = getParentEdgesAtPort(AXES_ID)[0]->getParent()->getCnnLayer();
-        if (axesLayer->type == "Const") {
-            auto axesBlob = dynamic_cast<TBlob<int>*>(axesLayer->blobs["custom"].get());
-            auto axesData = axesBlob->buffer().as<int*>();
-            int axesLen = getParentEdgeAt(AXES_ID)->getDims()[0];
-            axes.resize(axesLen);
-            for (int i = 0; i < axesLen; i++) {
-                axes[i] = axesData[i];
-            }
-        } else {
-            IE_THROW() << "Interpolate layer with name '" << getName() << "' only supports const 'axes' input.";
-        }
-    } else {
-        int dataRank = srcDim.size();
-        axes.resize(dataRank);
-        for (int i = 0; i < dataRank; i++) {
-            axes[i] = i;
-        }
-    }
-
-    if (scales.size() != axes.size()) {
-        IE_THROW() << "Interpolate layer with name '" << getName() <<
-        "' does not have the same number elements in scales as in axis.";
-    }
 }
 
 void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
@@ -1817,7 +1877,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
 
     setPostOps(attr, true);
 
-    Precision inputPrecision = getCnnLayer()->insData[DATA_ID].lock()->getPrecision();
+    Precision inputPrecision = getOriginalInputPrecisionAtPort(DATA_ID);
     if ((inputPrecision != Precision::I8) && (inputPrecision != Precision::U8) && (inputPrecision != Precision::BF16)) {
         inputPrecision = Precision::FP32;
     }
@@ -1827,10 +1887,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
     Precision outputPrecision = inputPrecision;
 
     if (!fusedWith.empty()) {
-        auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
-        if (lastFusedLayer) {
-            outputPrecision = lastFusedLayer->outData[0]->getPrecision();
-        }
+        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(DATA_ID);
     }
 
     if (!mayiuse(cpu::x64::sse41)) {
@@ -1853,18 +1910,6 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
         config.inConfs.resize(3);
     }
     config.outConfs.resize(1);
-    config.inConfs[DATA_ID].constant = false;
-    config.inConfs[TARGET_SHAPE_ID].constant = false;
-    config.inConfs[SCALES_ID].constant = false;
-    config.outConfs[0].constant = false;
-    config.inConfs[DATA_ID].inPlace = -1;
-    config.inConfs[TARGET_SHAPE_ID].inPlace = -1;
-    config.inConfs[SCALES_ID].inPlace = -1;
-    config.outConfs[0].inPlace = -1;
-    if (isAxesSpecified) {
-        config.inConfs[AXES_ID].constant = false;
-        config.inConfs[AXES_ID].inPlace = -1;
-    }
 
     auto targetShapeType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32);
     auto scalesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::FP32);
@@ -1931,18 +1976,18 @@ void MKLDNNInterpolateNode::createPrimitive() {
     if (getParentEdges().size() > 3) {
         auto &axesMemPtr = getParentEdgeAt(AXES_ID)->getMemoryPtr();
         if (!axesMemPtr || !axesMemPtr->GetPrimitivePtr())
-            IE_THROW() << "Interpolate layer with name '" << getName() << "' did not allocate axes memory";
+            IE_THROW() << errorPrefix << " did not allocate axes memory";
     }
     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' did not allocate destination memory";
+        IE_THROW() << errorPrefix << " did not allocate destination memory";
     if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' did not allocate input memory";
+        IE_THROW() << errorPrefix << " did not allocate input memory";
     if (!tsMemPtr || !tsMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' did not allocate target shape memory";
+        IE_THROW() << errorPrefix << " did not allocate target shape memory";
     if (!scaleMemPtr || !scaleMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' did not allocate scales memory";
+        IE_THROW() << errorPrefix << " did not allocate scales memory";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << "Interpolate layer with name '" << getName() << "' did not set preferable primitive descriptor";
+        IE_THROW() << errorPrefix << " did not set preferable primitive descriptor";
 
     auto selectedPD = getSelectedPrimitiveDescriptor();
     auto jcp = jit_interpolate_config_params();
@@ -2016,7 +2061,7 @@ void MKLDNNInterpolateNode::createPrimitive() {
             break;
         }
         default: {
-            IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support interpolate mode:" << mode;
+            IE_THROW() << errorPrefix << " does not support interpolate mode:" << mode;
             break;
         }
     }
@@ -2349,9 +2394,9 @@ void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
     mkldnn::post_ops ops;
 
     for (auto &node : fusedWith) {
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode) {
-            quantizeNode->appendPostOps(ops);
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
+        if (fakeQuantizeNode) {
+            fakeQuantizeNode->appendPostOps(ops);
             continue;
         }
 
@@ -2386,7 +2431,8 @@ std::vector<float> MKLDNNInterpolateNode::getScales() {
     int axesRank = axes.size();
     for (int i = 0; i < axesRank; i++) {
         int axis = axes[i];
-        fullScales[axis] = (shapeInferMode == "scales") ? scales[i] : static_cast<float>(dstDim[axis]) / static_cast<float>(srcDimPad[axis]);
+        fullScales[axis] = (shapeCalcMode == InterpolateShapeCalcMode::scales) ? scales[i] :
+                                                                                 static_cast<float>(dstDim[axis]) / static_cast<float>(srcDimPad[axis]);
     }
     return fullScales;
 }
@@ -3141,7 +3187,7 @@ inline float MKLDNNInterpolateNode::coordTransToInput(int outCoord, float scale,
             break;
         }
         default: {
-            IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support specified coordinate transformation mode";
+            IE_THROW() << errorPrefix << " does not support specified coordinate transformation mode";
             break;
         }
     }
@@ -3175,41 +3221,18 @@ inline int MKLDNNInterpolateNode::nearestRound(float originCoord, bool isDownsam
                 return static_cast<int>(originCoord);
         }
         default: {
-            IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support specified nearest round mode";
+            IE_THROW() << errorPrefix << " does not support specified nearest round mode";
             break;
         }
     }
 }
 
 bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const {
-    auto isOneOf = [&](EltwiseOpType alg, std::vector<EltwiseOpType> algs) {
-        for (auto a : algs) {
-            if (alg == a) {
-                return true;
-            }
-        }
-        return false;
-    };
-
     if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) {
         return false;
     }
 
-    if (node->getType() == Quantize) {
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot get quantize node " << node->getName();
-        return !quantizeNode->isBinarization();
-    } else if (node->getType() == Eltwise) {
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
-        if (eltwiseNode == nullptr)
-            IE_THROW() << "Cannot get eltwise node " << node->getName();
-        return isOneOf(eltwiseNode->getOpType(), {Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, SoftRelu,
-                                                  Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) ||
-                (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2);
-    }
-
-    return false;
+    return canFuseSimpleOperation(node);
 }
 
 bool MKLDNNInterpolateNode::created() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h
index 369765538dc..5912ddaa5e8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h
@@ -44,6 +44,11 @@ enum class InterpolateNearestMode {
     simple
 };
 
+enum class InterpolateShapeCalcMode {
+    sizes,
+    scales
+};
+
 struct jit_interpolate_config_params {
     InterpolateLayoutType layout;
     InterpolateMode mode;
@@ -85,7 +90,7 @@ struct jit_uni_interpolate_kernel {
 
 class MKLDNNInterpolateNode : public MKLDNNNode {
 public:
-    MKLDNNInterpolateNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNInterpolateNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNInterpolateNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -96,7 +101,9 @@ public:
     bool canBeInPlace() const override {
         return false;
     }
-    bool canFuse(const MKLDNNNodePtr& node) const;
+    bool canFuse(const MKLDNNNodePtr& node) const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
 
 private:
     // nearest neighbor
@@ -135,10 +142,10 @@ private:
     SizeVector getPaddedInputShape();
     std::vector<float> getScales();
 
-    const size_t DATA_ID = 0;
-    const size_t TARGET_SHAPE_ID = 1;
-    const size_t SCALES_ID = 2;
-    const size_t AXES_ID = 3;
+    static const size_t DATA_ID = 0;
+    static const size_t TARGET_SHAPE_ID = 1;
+    static const size_t SCALES_ID = 2;
+    static const size_t AXES_ID = 3;
     const int LINEAR_KERNEL = 2;
     const int CUBIC_GRID_LEN = 4;
 
@@ -149,6 +156,8 @@ private:
     std::vector<int> padEnd;
     bool hasPad = false;
     InterpolateNearestMode nearestMode = InterpolateNearestMode::round_prefer_floor;
+    InterpolateShapeCalcMode shapeCalcMode;
+
     float cubeCoeff = -0.75;
 
     bool isAxesSpecified = false;
@@ -157,7 +166,6 @@ private:
     std::vector<float> scales;
     // target shape is dst dim, full size.
     SizeVector dstDim;
-    std::string shapeInferMode;
     SizeVector srcDim;
     SizeVector srcDimPad;
     int spatialDimSize;
@@ -173,6 +181,8 @@ private:
     std::vector<int> indexTable;
 
     std::shared_ptr<jit_uni_interpolate_kernel> interpolateKernel = nullptr;
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
index b51e437923e..239837c8ab3 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
@@ -3,40 +3,90 @@
 //
 
 #include "mkldnn_lrn_node.h"
-
-#include <legacy/ie_layers.h>
 #include <string>
 #include <mkldnn_extension_utils.h>
+#include <ngraph/opsets/opset1.hpp>
 
-using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNLrnNode::MKLDNNLrnNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNLrnNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto lrn = std::dynamic_pointer_cast<const ngraph::opset1::LRN>(op);
+        if (!lrn) {
+            errorMessage = "Only opset1 LRN operation is supported";
+            return false;
+        }
+
+        const auto dataDims = lrn->get_input_shape(0);
+        if (dataDims.size() < 2 && dataDims.size() > 5) {
+            errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(dataDims.size());
+            return false;
+        }
+        const auto axesNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(lrn->get_input_node_shared_ptr(1));
+        if (!axesNode) {
+            errorMessage = "Only Constant operation on 'axis' input is supported";
+            return false;
+        }
+
+        const auto axes = axesNode->cast_vector<int64_t>();
+        const auto dataRank = dataDims.size();
+        if (axes.size() == 1 && axes[0] == 1) {
+            return true;
+        } else {
+            std::vector<bool> norm(dataRank, false);
+            for (auto &axis : axes) {
+                if (axis < 0 || axis >= dataRank) {
+                    errorMessage = "Has incorrect reduction axis: " + std::to_string(axis);
+                    return false;
+                }
+                norm[axis] = true;
+            }
+
+            for (size_t i = 2; i < norm.size(); ++i) {
+                if (!norm[i]) {
+                    errorMessage = "Supports only across channels or across spatial reduction";
+                    return false;
+                }
+            }
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNLrnNode::MKLDNNLrnNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "LRN node with name '" + getName() + "'";
+
+        const auto lrn = std::dynamic_pointer_cast<const ngraph::opset1::LRN>(op);
+        const auto axes = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(lrn->get_input_node_shared_ptr(1))->cast_vector<int64_t>();
+        isAcrossMaps = (axes.size() == 1 && axes[0] == 1);
+        alpha = static_cast<float>(lrn->get_alpha());
+        beta = static_cast<float>(lrn->get_beta());
+        k = static_cast<float>(lrn->get_bias());
+        size = lrn->get_nsize();
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNLrnNode::getSupportedDescriptors() {
     if (!descs.empty())
         return;
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
+
+    if (getParentEdges().size() != 2)
+        IE_THROW() << errorPrefix << " has incorrect number of input edges";
+    if (getChildEdges().empty())
+        IE_THROW() << errorPrefix << " has incorrect number of output edges";
+
+    InferenceEngine::Precision precision = getOriginalOutputPrecisionAtPort(0);
     if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16)
         precision = InferenceEngine::Precision::FP32;
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-    auto * lrnLayer = dynamic_cast<NormLayer*>(getCnnLayer().get());
-
-    if (lrnLayer == nullptr)
-        IE_THROW() << "Cannot convert lrn layer.";
-
-    if (getParentEdges().size() != 1)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
-    if (getChildEdges().empty())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
-
-    isAcrossMaps = lrnLayer->_isAcrossMaps;
-    alpha = lrnLayer->_alpha;
-    beta = lrnLayer->_beta;
-    size = lrnLayer->_size;
-    k = lrnLayer->_k;
 
     auto parentDims = getParentEdgeAt(0)->getDims();
 
@@ -46,13 +96,23 @@ void MKLDNNLrnNode::getSupportedDescriptors() {
     }
 }
 
+MKLDNNMemoryDesc MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+    if (idx > 0) {
+        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(getOriginalInputPrecisions()[idx],
+                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
+                                                            TensorDesc::getLayoutByDims(getParentEdgeAt(idx)->getDims().ToSizeVector())));
+    } else {
+        return MKLDNNNode::getSrcMemDesc(primitive_desc_it, idx);
+    }
+}
+
 void MKLDNNLrnNode::createPrimitive() {
     if (prim)
         return;
 
-    auto prim_desc = createPrimitiveDescriptor<lrn_forward::primitive_desc, lrn_forward::desc>();
+    auto prim_desc = createPrimitiveDescriptor<mkldnn::lrn_forward::primitive_desc, mkldnn::lrn_forward::desc>();
 
-    prim.reset(new lrn_forward(prim_desc));
+    prim.reset(new mkldnn::lrn_forward(prim_desc));
 
     auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
     auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
@@ -63,36 +123,13 @@ bool MKLDNNLrnNode::created() const {
     return getType() == Lrn;
 }
 
-void MKLDNNLrnNode::initOptimalPrimitiveDescriptor() {
-    auto selected_pd = getSelectedPrimitiveDescriptor();
-    if (selected_pd == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
-    auto config = selected_pd->getConfig();
-    if (isInitConfig(config))
-        return;
-
-    if (config.inConfs.size() != 1 || config.outConfs.size() != 1 ||
-            (!isUninitTensorDesc(config.inConfs[0].desc) &&
-                    !isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc))
-        IE_THROW() << "Layer " << getName() << " has incorrect selected config!";
-
-    if (!isUninitTensorDesc(config.inConfs[0].desc)) {
-        config.outConfs[0].desc = config.inConfs[0].desc;
-    } else if (!isUninitTensorDesc(config.outConfs[0].desc)) {
-        config.inConfs[0].desc = config.outConfs[0].desc;
-    } else {
-        config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0);
-    }
-
-    initDescriptor(config);
-}
-
 void MKLDNNLrnNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
                                      const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
-    algorithm alg = (isAcrossMaps) ? algorithm::lrn_across_channels : algorithm::lrn_within_channel;
+    mkldnn::algorithm alg = isAcrossMaps ? mkldnn::algorithm::lrn_across_channels : mkldnn::algorithm::lrn_within_channel;
     MKLDNNMemoryDesc in_candidate(inputDesc[0]);
-    MKLDNNDescriptor desc(std::shared_ptr<lrn_forward::desc>(
-            new lrn_forward::desc(prop_kind::forward_scoring, alg, in_candidate, size, alpha, beta, k)));
+    MKLDNNDescriptor desc(std::shared_ptr<mkldnn::lrn_forward::desc>(
+            new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, in_candidate, size, alpha, beta, k)));
     descs.push_back(desc);
 }
+
 REG_MKLDNN_PRIM_FOR(MKLDNNLrnNode, Lrn);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h
index 68dc087ab59..461eddfead8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h
@@ -14,25 +14,32 @@ namespace MKLDNNPlugin {
 
 class MKLDNNLrnNode : public MKLDNNNode {
 public:
-    MKLDNNLrnNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNLrnNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNLrnNode() override = default;
 
     void getSupportedDescriptors() override;
-    void initOptimalPrimitiveDescriptor() override;
     void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
                           const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
+    size_t descInputNumbers(MKLDNNDescriptor desc) override {
+        return static_cast<size_t>(getOriginalInputsNumber());
+    }
+    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
     void createPrimitive() override;
     bool created() const override;
     bool canBeInPlace() const override {
         return false;
     }
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     bool isAcrossMaps = false;
-    int size = 1;
+    size_t size = 1;
     int k = 1;
     float alpha = 1.0f;
     float beta = 1.0f;
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
similarity index 64%
rename from inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
rename to inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
index 9bf971f3190..b7f2c0a4277 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
@@ -2,8 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "mkldnn_gemm_node.h"
-#include <legacy/ie_layers.h>
+#include "mkldnn_matmul_node.h"
 #include <string>
 #include <vector>
 #include <memory>
@@ -13,43 +12,70 @@
 #include <mkldnn_extension_utils.h>
 #include "ie_parallel.hpp"
 #include "common/cpu_memcpy.h"
+#include <ngraph/opsets/opset1.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNGemmNode::MKLDNNGemmNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto matMul = std::dynamic_pointer_cast<const ngraph::opset1::MatMul>(op);
+        if (!matMul) {
+            errorMessage = "Only opset1 MatMul operation is supported";
+            return false;
+        }
 
-void MKLDNNGemmNode::getSupportedDescriptors() {
-    auto* gemmLayer = dynamic_cast<GemmLayer*>(getCnnLayer().get());
+        const auto shapeA = matMul->get_input_shape(0);
+        const auto shapeB = matMul->get_input_shape(1);
 
-    if (gemmLayer == nullptr)
-        IE_THROW() << "Cannot convert gemm layer.";
+        for (size_t i = 0; i < matMul->get_input_size(); i++) {
+            const auto inShapeRank = matMul->get_input_shape(i).size();
+            if (inShapeRank < 2 || inShapeRank > 4) {
+                errorMessage = "Unsupported rank: " + std::to_string(inShapeRank) + " on " + std::to_string(i) + " input";
+                return false;
+            }
+        }
 
-    if (getParentEdges().size() != 2 && getParentEdges().size() != 3)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
+        const auto outShapeRank = matMul->get_shape().size();
+        if (outShapeRank < 2 || outShapeRank > 4) {
+            errorMessage = "Unsupported rank: " + std::to_string(outShapeRank) + " on output";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNMatMulNode::MKLDNNMatMulNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "Gemm node with name '" + getName() + "'";
+
+        const auto matMul = std::dynamic_pointer_cast<const ngraph::opset1::MatMul>(op);
+        alpha = 1;
+        beta = 1;
+        transposeA = matMul->get_transpose_a();
+        transposeB = matMul->get_transpose_b();
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
+
+void MKLDNNMatMulNode::getSupportedDescriptors() {
+    if (getParentEdges().size() != 2)
+        IE_THROW()  << errorPrefix << " has incorrect number of input edges for layer " << getName();
     if (getChildEdges().empty())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
+        IE_THROW()  << errorPrefix << " has incorrect number of output edges for layer " << getName();
 
     auto inDims0 = getParentEdgeAt(0)->getDims();
     auto inDims1 = getParentEdgeAt(1)->getDims();
     auto outDims = getChildEdgeAt(0)->getDims();
 
-    alpha = gemmLayer->alpha;
-    beta = gemmLayer->beta;
-    transposeA = gemmLayer->transpose_a;
-    transposeB = gemmLayer->transpose_b;
-
-    if ((inDims0.ndims() < 2 || inDims0.ndims() > 4) ||
-        (inDims1.ndims() < 2 || inDims1.ndims() > 4))
-        IE_THROW() << "Unsupported input dims count for layer " << getName();
-
-    if (outDims.ndims() < 2 || outDims.ndims() > 4)
-        IE_THROW() << "Unsupported output dims count for layer " << getName();
-
     if (inDims0.ndims() != inDims1.ndims() || inDims0.ndims() != outDims.ndims())
-        IE_THROW() << "Invalid dims count for layer " << getName();
+        IE_THROW()  << errorPrefix << " has invalid dims count";
 
     int nDims = inDims0.ndims();
     xAxis = nDims - 1;
@@ -62,39 +88,12 @@ void MKLDNNGemmNode::getSupportedDescriptors() {
     // The check inDims0[xAxis] != inDims1[yAxis] is correct due to layer semantic
     // coverity[copy_paste_error]
     if (inDims0[xAxis0] != inDims1[yAxis1] || inDims0[yAxis0] != outDims[yAxis] || inDims1[xAxis1] != outDims[xAxis])
-        IE_THROW() << "Spatial input and output dimensions are incorrect for layer " << getName();
-
-    isThreeInputs = getParentEdges().size() == 3;
-
-    if (isThreeInputs) {
-        auto inDims2 = getParentEdgeAt(2)->getDims();
-
-        if (inDims2.ndims() < 2 || inDims2.ndims() > 4)
-            IE_THROW() << "Unsupported output dims count for layer " << getName();
-
-        if (inDims2.ndims() != outDims.ndims())
-            IE_THROW() << "Invalid dims count for layer " << getName();
-
-        if (inDims2[yAxis] != outDims[yAxis] || inDims2[xAxis] != outDims[xAxis])
-            IE_THROW() << "Spatial input and output dimensions are incorrect for layer " << getName();
-    }
+        IE_THROW()  << errorPrefix << " has incorrect spatial input and output dimensions";
 
     for (int dim_idx = nDims - 3; dim_idx >= 0; dim_idx--) {
-        if (isThreeInputs) {
-            auto inDims2 = getParentEdgeAt(2)->getDims();
-
-            if (inDims2[dim_idx] != outDims[dim_idx] && inDims2[dim_idx] != 1)
-                IE_THROW() << "Input batch dimensions are incorrect for layer " << getName();
-
-            int cOffset = 1;
-            for (int i = dim_idx + 1; i < nDims; i++)
-                cOffset *= inDims2[i];
-            cOffsets.push_back(inDims2[dim_idx] == outDims[dim_idx] ? cOffset : 0);
-        }
-
         if ((inDims0[dim_idx] != outDims[dim_idx] && inDims0[dim_idx] != 1) ||
             (inDims1[dim_idx] != outDims[dim_idx] && inDims1[dim_idx] != 1)) {
-            IE_THROW() << "Input batch dimensions are incorrect for layer " << getName();
+            IE_THROW()  << errorPrefix << " has incorrect input batch dimensions";
         }
 
         int aOffset = 1;
@@ -116,13 +115,13 @@ void MKLDNNGemmNode::getSupportedDescriptors() {
         cOffsets.push_back(0);
 }
 
-void MKLDNNGemmNode::initSupportedPrimitiveDescriptors() {
+void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto inPrec0 = getCnnLayer()->insData[0].lock()->getPrecision();
-    auto inPrec1 = getCnnLayer()->insData[1].lock()->getPrecision();
-    if ((inPrec0 != Precision::U8 && inPrec0 != Precision::I8) || inPrec1 != Precision::I8 || isThreeInputs) {
+    auto inPrec0 = getOriginalInputPrecisionAtPort(0);
+    auto inPrec1 = getOriginalInputPrecisionAtPort(1);
+    if ((inPrec0 != Precision::U8 && inPrec0 != Precision::I8) || inPrec1 != Precision::I8) {
         if (inPrec0 == Precision::BF16 || inPrec1 == Precision::BF16) {
             inPrec0 = Precision::BF16;
             inPrec1 = Precision::BF16;
@@ -149,20 +148,15 @@ void MKLDNNGemmNode::initSupportedPrimitiveDescriptors() {
 
     config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), inputDataType0));
     config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), inputDataType1));
-    if (isThreeInputs) {
-        auto inputDataType2 = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
-        config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getDims(), inputDataType2));
-    }
-
     config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), outputDataType));
 
     supportedPrimitiveDescriptors.push_back(PrimitiveDescInfo(config, impl_desc_type::gemm_any, MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims())));
 }
 
-void MKLDNNGemmNode::initOptimalPrimitiveDescriptor() {
+void MKLDNNMatMulNode::initOptimalPrimitiveDescriptor() {
     auto selected_pd = getSelectedPrimitiveDescriptor();
     if (selected_pd == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
+        IE_THROW()  << errorPrefix << " did not set preferable primitive descriptor";
     auto config = selected_pd->getConfig();
     if (isInitConfig(config))
         return;
@@ -175,22 +169,16 @@ void MKLDNNGemmNode::initOptimalPrimitiveDescriptor() {
     }
 }
 
-void MKLDNNGemmNode::createPrimitive() {
+void MKLDNNMatMulNode::createPrimitive() {
     auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
     auto& src0MemPtr = getParentEdgeAt(0)->getMemoryPtr();
     auto& src1MemPtr = getParentEdgeAt(1)->getMemoryPtr();
     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Destination memory isn't allocated.";
+        IE_THROW()  << errorPrefix << " did not allocate destination memory";
     if (!src0MemPtr || !src0MemPtr->GetPrimitivePtr() || !src1MemPtr || !src1MemPtr->GetPrimitivePtr())
-        IE_THROW() << "Input memory isn't allocated.";
+        IE_THROW()  << errorPrefix << " did not allocate input memory";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << "Preferable primitive descriptor isn't set.";
-
-    if (isThreeInputs) {
-        auto& src2MemPtr = getParentEdgeAt(2)->getMemoryPtr();
-        if (!src2MemPtr || !src2MemPtr->GetPrimitivePtr())
-            IE_THROW() << "Input memory isn't allocated.";
-    }
+        IE_THROW()  << errorPrefix << " did not set preferable primitive descriptor";
 }
 
 inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const float *A, int lda,
@@ -224,7 +212,7 @@ inline void process_gemm(char transa, char transb, int M, int N, int K, float al
 }
 
 template<typename T0, typename T1>
-void MKLDNNGemmNode::process_data() {
+void MKLDNNMatMulNode::process_data() {
     auto inDims0 = getParentEdgeAt(0)->getDims();
     auto inDims1 = getParentEdgeAt(1)->getDims();
     auto outDims = getChildEdgeAt(0)->getDims();
@@ -250,30 +238,14 @@ void MKLDNNGemmNode::process_data() {
     int ldb = transposeB ? K : N;
     int ldc = N;
 
-    const float *src2_ptr;
-    if (isThreeInputs) {
-        auto& srcMemory2 = getParentEdgeAt(2)->getMemory();
-        src2_ptr = reinterpret_cast<const float *>(srcMemory2.GetPtr());
-    } else {
-        src2_ptr = dst_ptr;
-    }
-
-    if (!isThreeInputs) {
-        beta = 0.f;
-    }
+    beta = 0.f;
 
     for (int b1 = 0; b1 < MB1; b1++) {
         const T0 *a_ptr = src0_ptr;
         const T1 *b_ptr = src1_ptr;
-        const float *c_ptr = src2_ptr;
         float *d_ptr = dst_ptr;
 
         for (int b2 = 0; b2 < MB2; b2++) {
-            if (isThreeInputs) {
-                cpu_memcpy(d_ptr, c_ptr, M * N * sizeof(float));
-                c_ptr += cOffsets[0];
-            }
-
             process_gemm(transa, transb, M, N, K, alpha, a_ptr, lda, b_ptr, ldb, beta, d_ptr, ldc);
 
             a_ptr += aOffsets[0];
@@ -284,14 +256,10 @@ void MKLDNNGemmNode::process_data() {
         src0_ptr += aOffsets[1];
         src1_ptr += bOffsets[1];
         dst_ptr += MB2 * M * N;
-
-        if (isThreeInputs) {
-            src2_ptr += cOffsets[1];
-        }
     }
 }
 
-void MKLDNNGemmNode::execute(mkldnn::stream strm) {
+void MKLDNNMatMulNode::execute(mkldnn::stream strm) {
     switch (getParentEdgeAt(0)->getDesc().getPrecision()) {
         case Precision::FP32:
             process_data<float, float>();
@@ -306,22 +274,22 @@ void MKLDNNGemmNode::execute(mkldnn::stream strm) {
             process_data<uint8_t, int8_t>();
             break;
         default:
-            IE_THROW() << "Gemm node: first input has unsupported precision";
+            IE_THROW()  << errorPrefix << " has incorrect precision on first input";
     }
 }
 
-bool MKLDNNGemmNode::created() const {
-    return getType() == Gemm;
+bool MKLDNNMatMulNode::created() const {
+    return getType() == MatMul;
 }
 
-int MKLDNNGemmNode::getMaxBatch() {
+int MKLDNNMatMulNode::getMaxBatch() {
     if (!outDims.empty())
         return outDims[0][0];
     return 0;
 }
 
-InferenceEngine::Precision MKLDNNGemmNode::getRuntimePrecision() const {
+InferenceEngine::Precision MKLDNNMatMulNode::getRuntimePrecision() const {
     return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions());
 }
 
-REG_MKLDNN_PRIM_FOR(MKLDNNGemmNode, Gemm);
+REG_MKLDNN_PRIM_FOR(MKLDNNMatMulNode, MatMul);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h
similarity index 77%
rename from inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h
rename to inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h
index 24c31bcddb5..71fbacd6e47 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h
@@ -11,10 +11,10 @@
 
 namespace MKLDNNPlugin {
 
-class MKLDNNGemmNode : public MKLDNNNode {
+class MKLDNNMatMulNode : public MKLDNNNode {
 public:
-    MKLDNNGemmNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
-    ~MKLDNNGemmNode() override = default;
+    MKLDNNMatMulNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNMatMulNode() override = default;
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -26,6 +26,8 @@ public:
 
     InferenceEngine::Precision getRuntimePrecision() const override;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     float alpha = 1.0f;
     float beta = 1.0f;
@@ -35,13 +37,13 @@ private:
     int xAxis = 0;
     int yAxis = 0;
 
-    bool isThreeInputs = false;
-
     std::vector<int> aOffsets;
     std::vector<int> bOffsets;
     std::vector<int> cOffsets;
 
     template<typename T0, typename T1> void process_data();
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
index 895e2ccfc53..4aa0281a114 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
@@ -7,6 +7,7 @@
 #include <mkldnn_extension_utils.h>
 #include "mkldnn_memory_node.hpp"
 #include "common/cpu_memcpy.h"
+#include "utils/general_utils.h"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -14,8 +15,34 @@ using namespace InferenceEngine;
 
 std::mutex MKLDNNMemoryNodeVirtualEdge::holderMutex;
 
-MKLDNNMemoryOutputNode::MKLDNNMemoryOutputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) , MKLDNNMemoryNode(layer) {
+MKLDNNMemoryNode::MKLDNNMemoryNode(const std::shared_ptr<ngraph::Node>& op) {
+    if (auto assignOp = std::dynamic_pointer_cast<ngraph::op::AssignBase>(op)) {
+        _id = assignOp->get_variable_id();
+    } else if (auto readValueOp = std::dynamic_pointer_cast<ngraph::op::ReadValueBase>(op)) {
+        _id = readValueOp->get_variable_id();
+    }
+}
+
+bool MKLDNNMemoryOutputNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (!MKLDNNPlugin::one_of(op->get_type_info(),
+                ngraph::op::v3::Assign::type_info,
+                ngraph::op::v6::Assign::type_info)) {
+            errorMessage = "Node is not an instance of Assign from the operation set v3 or v6.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNMemoryOutputNode::MKLDNNMemoryOutputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) , MKLDNNMemoryNode(op) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
     if (created()) {
         holder = MKLDNNMemoryNodeVirtualEdge::registerOutput(this);
     }
@@ -31,7 +58,7 @@ void MKLDNNMemoryOutputNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
     InferenceEngine::LayerConfig config;
     config.dynBatchSupport = true;
@@ -50,8 +77,26 @@ void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm)  {
     inputMemoryNode->storeState(srcMemory);
 }
 
-MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNInputNode(layer, eng, cache), MKLDNNMemoryNode(layer), dataStore(new MKLDNNMemory{eng}) {
+bool MKLDNNMemoryInputNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (!MKLDNNPlugin::one_of(op->get_type_info(),
+                ngraph::op::v3::ReadValue::type_info,
+                ngraph::op::v6::ReadValue::type_info)) {
+            errorMessage = "Node is not an instance of ReadValue from the operation set v3 or v6.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNInputNode(op, eng, cache), MKLDNNMemoryNode(op), dataStore(new MKLDNNMemory{eng}) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
     if (created()) {
         holder = MKLDNNMemoryNodeVirtualEdge::registerInput(this);
     }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp
index fbc560f23d3..c7810ba1c9a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp
@@ -18,11 +18,7 @@ class MKLDNNMemoryNode {
     std::string _id;
  public:
     explicit MKLDNNMemoryNode(std::string id) : _id(id) {}
-    explicit MKLDNNMemoryNode(InferenceEngine::CNNLayerPtr lp) {
-        if (lp->params.find("id") != lp->params.end()) {
-            _id = lp->GetParamAsString("id");
-        }
-    }
+    explicit MKLDNNMemoryNode(const std::shared_ptr<ngraph::Node>& op);
     virtual ~MKLDNNMemoryNode() = default;
     std::string getId() {
         return _id;
@@ -61,8 +57,9 @@ class MKLDNNMemoryNodeVirtualEdge {
 
 class MKLDNNMemoryOutputNode : public MKLDNNNode, public MKLDNNMemoryNode {
  public:
-    MKLDNNMemoryOutputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNMemoryOutputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNMemoryOutputNode() override;
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
     void createPrimitive() override {}
@@ -85,9 +82,10 @@ class MKLDNNMemoryOutputNode : public MKLDNNNode, public MKLDNNMemoryNode {
 
 class MKLDNNMemoryInputNode : public MKLDNNInputNode, public MKLDNNMemoryNode {
 public:
-    MKLDNNMemoryInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNMemoryInputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNMemoryInputNode() override;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     bool created() const override {
         return getType() == MemoryInput;
     }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
index 86f8dbc78fb..edbd4e426b1 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
@@ -4,18 +4,15 @@
 
 #include "mkldnn_mvn_node.h"
 
-#include "mkldnn_quantize_node.h"
-#include <legacy/ie_layers.h>
-#include "mkldnn_eltwise_node.h"
-#include <mkldnn.hpp>
+#include <algorithm>
 #include <string>
 #include <vector>
-#include <mkldnn_types.h>
+
+#include "mkldnn_fake_quantize_node.h"
+#include "mkldnn_eltwise_node.h"
 #include <mkldnn_extension_utils.h>
 #include "utils/bfloat16.hpp"
-#include <legacy/ie_layers_internal.hpp>
 #include "ie_parallel.hpp"
-#include <algorithm>
 #include "emitters/jit_load_store_emitters.hpp"
 #include "emitters/jit_bf16_emitters.hpp"
 
@@ -605,44 +602,97 @@ private:
 };
 //////////////////////////////////////////////////////////////////////////////////
 
-MKLDNNMVNNode::MKLDNNMVNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache), epsMode_(insideSqrt) {}
+bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto& inDataShapeSize = op->input_value(0).get_shape().size();
+        if (inDataShapeSize < 1 || inDataShapeSize > 5) {
+            errorMessage = "First input accepts ranks from 1 to 5. Actual: " + std::to_string(inDataShapeSize);
+            return false;
+        }
+
+        if (auto mvnOp = ngraph::as_type_ptr<const ngraph::op::v6::MVN>(op)) {
+            auto axesOp = ngraph::as_type_ptr<ngraph::op::Constant>(mvnOp->get_input_node_shared_ptr(1));
+            if (!axesOp) {
+                errorMessage = "Constant expected as the second input.";
+                return false;
+            }
+
+            auto epsMode = mvnOp->get_eps_mode();
+            if (epsMode != ngraph::op::MVNEpsMode::INSIDE_SQRT &&
+                    epsMode != ngraph::op::MVNEpsMode::OUTSIDE_SQRT) {
+                errorMessage = std::string("Just INSIDE_SQRT and OUTSIDE_SQRT epsilon mods are supported. Actual: ") +
+                        std::to_string(static_cast<int>(epsMode));
+                return false;
+            }
+            // Validates MVN node axes to check whether it can be executed on the current CPU implementation.
+            // Supported cases:
+            // 1D: axes: [0]
+            // 2D: axes: [1]
+            // 3D: axes: [1,2], [2]
+            // 4D: axes: [1,2,3], [2,3]
+            // 5D: axes: [1,2,3,4], [2,3,4]
+            auto axesVal = axesOp->cast_vector<int>();
+            auto& mvnShape = mvnOp->get_output_shape(0);
+            for (int& axe : axesVal)
+                axe = axe < 0 ? axe + mvnShape.size() : axe;
+            std::sort(axesVal.begin(), axesVal.end());
+            if (mvnShape.size() == 1) {
+                if (axesVal.size() != 1 || axesVal[0] != 0) {
+                    errorMessage = "Unsupported axes.";
+                    return false;
+                }
+            } else {
+                if (mvnShape.size() > 5 || (mvnShape.size() != axesVal.size() + 1 && mvnShape.size() != axesVal.size() + 2)) {
+                    errorMessage = "Unsupported axes.";
+                    return false;
+                }
+                int value = mvnShape.size() - 1;
+                for (int i = axesVal.size() - 1; i >= 0; i--, value--) {
+                    if (axesVal[i] != value) {
+                        errorMessage = "Unsupported axes.";
+                        return false;
+                    }
+                }
+            }
+        } else if (auto mvnOp = ngraph::as_type_ptr<const ngraph::op::v0::MVN>(op)) {
+        } else {
+            errorMessage = "Node is not an instance of the MVN operation.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v6::MVN>(op)) {
+        normalizeVariance_ = mvnOp->get_normalize_variance();
+        epsValue_ = mvnOp->get_eps();
+        epsMode_ = INSIDE_SQRT;
+        if (mvnOp->get_eps_mode() == ngraph::op::MVNEpsMode::OUTSIDE_SQRT) {
+            epsMode_ = OUTSIDE_SQRT;
+        }
+
+        acrossChannels_ = false;
+        const auto& inDataShapeSize = op->input_value(0).get_shape().size();
+        if (inDataShapeSize == mvnOp->input_value(1).get_shape()[0] + 1 || inDataShapeSize == 1)
+            acrossChannels_ = true;
+    } else if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v0::MVN>(op)) {
+        normalizeVariance_ = mvnOp->get_normalize_variance();
+        epsValue_ = mvnOp->get_eps();
+        epsMode_ = INSIDE_SQRT;
+        acrossChannels_ = mvnOp->get_across_channels();
+    }
+}
 
 void MKLDNNMVNNode::getSupportedDescriptors() {
-    if (!descs.empty())
-        return;
-
-    std::string errPrefix = "MVN node with name '" + getName() + "' ";
-
-    auto cnnLayer = getCnnLayer();
-    if (cnnLayer == nullptr)
-        IE_THROW() << errPrefix << "does not have CNN layer.";
-
-    if (getParentEdges().size() > 2)
-        IE_THROW() << errPrefix << "has incorrect number of input edges.";
-
-    if (getChildEdges().empty())
-        IE_THROW() << errPrefix << "has incorrect number of output edges.";
-
-    const auto& numOfDims = getParentEdgeAt(0)->getDims().ndims();
-    if (numOfDims < 1 || numOfDims > 5)
-        IE_THROW() << errPrefix << "doesn't support input with size of dimensions: " << numOfDims;
-
-    across_channels = false;
-    if (getParentEdges().size() == 1) {
-        across_channels = cnnLayer->GetParamAsBool("across_channels");
-    } else {
-        if (numOfDims == getParentEdgeAt(1)->getDims().size() + 1 || numOfDims == 1)
-            across_channels = true;
-    }
-    normalize_variance = cnnLayer->GetParamAsBool("normalize_variance", true);
-    eps = cnnLayer->GetParamAsFloat("eps");
-    auto epsMode = cnnLayer->GetParamAsString("eps_mode", "");
-    if (details::CaselessEq<std::string>()(epsMode, "inside_sqrt")) {
-        epsMode_ = insideSqrt;
-    } else if (details::CaselessEq<std::string>()(epsMode, "outside_sqrt")) {
-        epsMode_ = outsideSqrt;
-    }
 }
 
 void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
@@ -651,24 +701,21 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
 
     setPostOps(attr, true);
 
-    Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
+    Precision inputPrecision = getOriginalInputPrecisionAtPort(0);
     if (getParentEdgeAt(0)->getDims().ndims() < 3 || getParentEdgeAt(0)->getDims().ndims() > 5
-        || across_channels != 0 || normalize_variance != 1) {
+            || acrossChannels_ || !normalizeVariance_) {
         if (!isFloatCompatible(inputPrecision)) {
             inputPrecision = Precision::FP32;
         }
     }
-    Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision();
+    Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
     if (!mayiuse(avx512_core)) {
         if (outputPrecision == Precision::BF16)
             outputPrecision = Precision::FP32;
     }
 
     if (!fusedWith.empty()) {
-        auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
-        if (lastFusedLayer) {
-            outputPrecision = lastFusedLayer->outData[0]->getPrecision();
-        }
+        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
 
     // ref with float planar and no fusion
@@ -688,7 +735,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
                         (getParentEdgeAt(0)->getParent()->getChildEdges().size() == 1) &&
                         !getParentEdgeAt(0)->getParent()->isConstant();
 
-    const size_t inputsNum = getCnnLayer()->insData.size();
+    const size_t inputsNum = getParentEdges().size();
     InferenceEngine::LayerConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(inputsNum);
@@ -698,7 +745,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
     config.inConfs[0].inPlace = -1;
     config.outConfs[0].inPlace = canBeInplace ? 0 : -1;
     if (inputsNum == 2) {
-        const auto& dims = getCnnLayer()->insData[1].lock()->getTensorDesc().getDims();
+        const auto dims = getParentEdgeAt(1)->getDims().ToSizeVector();
         config.inConfs[1].desc = TensorDesc(Precision::I32,
             dims,
             TensorDesc::getLayoutByDims(dims));
@@ -759,7 +806,7 @@ std::tuple<size_t, size_t, size_t, size_t, size_t> MKLDNNMVNNode::get5dShapes(co
         case 3 : { shapes = std::make_tuple(dims[0], dims[1], 1, dims[2], 1); break; }
         case 4 : { shapes = std::make_tuple(dims[0], dims[1], 1, dims[2], dims[3]); break; }
         case 5 : { shapes = std::make_tuple(dims[0], dims[1], dims[2], dims[3], dims[4]); break; }
-        default : { IE_THROW() << "MVN layer with name '" << getCnnLayer()->name << "' doesn't support planar layout with rank: " << dims.size(); }
+        default : { IE_THROW() << "MVN layer with name '" << getName() << "' doesn't support planar layout with rank: " << dims.size(); }
     }
     return shapes;
 }
@@ -781,8 +828,8 @@ void MKLDNNMVNNode::createPrimitive() {
     jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc));
     jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc));
     jcp.planar_layout = MKLDNNMemory::GetPlainLayout(getChildEdgeAt(0)->getDims()) == selectedPD->getConfig().inConfs[0].desc.getLayout();
-    jcp.normalize_variance = normalize_variance;
-    jcp.across_channels = across_channels;
+    jcp.normalize_variance = normalizeVariance_;
+    jcp.across_channels = acrossChannels_;
     SizeVector in_dims = getParentEdgeAt(0)->getDims().ToSizeVector();
     int N = 0;
     std::tie(N, jcp.C, jcp.D, jcp.H, jcp.W) = get5dShapes(in_dims);
@@ -792,7 +839,7 @@ void MKLDNNMVNNode::createPrimitive() {
 
         jcp.normalize_variance = false;
         mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
-        if (normalize_variance) {
+        if (normalizeVariance_) {
             jcp.normalize_variance = true;
             mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
         }
@@ -801,7 +848,7 @@ void MKLDNNMVNNode::createPrimitive() {
 
         jcp.normalize_variance = false;
         mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
-        if (normalize_variance) {
+        if (normalizeVariance_) {
             jcp.normalize_variance = true;
             mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
         }
@@ -810,7 +857,7 @@ void MKLDNNMVNNode::createPrimitive() {
 
         jcp.normalize_variance = false;
         mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
-        if (normalize_variance) {
+        if (normalizeVariance_) {
             jcp.normalize_variance = true;
             mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
         }
@@ -829,9 +876,9 @@ void MKLDNNMVNNode::createPrimitive() {
 void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
     mkldnn::post_ops ops;
     for (auto &node : fusedWith) {
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode) {
-            quantizeNode->appendPostOps(ops);
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
+        if (fakeQuantizeNode) {
+            fakeQuantizeNode->appendPostOps(ops);
             continue;
         }
 
@@ -854,8 +901,8 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) {
 
     auto dim = getParentEdgeAt(0)->getDesc().getDims();
     if (mayiuse(cpu::x64::sse41)) {
-        if (!mvn_mean_kernel || (normalize_variance && !mvn_variance_kernel) || !mvn_kernel) {
-            IE_THROW() << "MVN layer with name '" << getCnnLayer()->name << "' doesn't create kernel to execute on sse41 above platform.";
+        if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) {
+            IE_THROW() << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform.";
         }
         Layout layout = getParentEdgeAt(0)->getDesc().getLayout();
         if (layout == C || layout == NC || layout == CHW || layout == NCHW || layout == NCDHW) {
@@ -890,7 +937,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
 
     for (size_t b = 0lu; b < N; b++) {
         size_t cb = b * C3;
-        if (across_channels) {
+        if (acrossChannels_) {
             // Calculate mean value for one instance in batch
             // Parallel sum for each channel
             float C3inv = 1.f / static_cast<float>(C3);
@@ -911,7 +958,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
 
             // calculate variance value for one instance in batch
             // parallel sum for each channel
-            if (normalize_variance) {
+            if (normalizeVariance_) {
                 float variance_temp = 0.0f;
                 variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float {
                     float variance_internal = 0.0f;
@@ -927,10 +974,10 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
                 });
 
                 float variance = 1.f;
-                if (epsMode_ == insideSqrt)
-                    variance /= sqrtf(variance_temp * C3inv + eps);
-                else if (epsMode_ == outsideSqrt)
-                    variance /= sqrtf(variance_temp * C3inv) + eps;
+                if (epsMode_ == INSIDE_SQRT)
+                    variance /= sqrtf(variance_temp * C3inv + epsValue_);
+                else if (epsMode_ == OUTSIDE_SQRT)
+                    variance /= sqrtf(variance_temp * C3inv) + epsValue_;
                 // mvn for one instance in batch
                 parallel_for(C, [&](int c) {
                     size_t cc = cb + c * C2;
@@ -979,17 +1026,17 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
 
                 mean *= C2inv;
 
-                if (normalize_variance) {
+                if (normalizeVariance_) {
                     // variance for this channel
                     float variance = 0.f;
                     arg.mean = static_cast<float*>(&mean);
                     arg.variance = static_cast<float*>(&variance);
                     (*mvn_variance_kernel)(&arg);
 
-                    if (epsMode_ == insideSqrt)
-                        variance = 1.f / sqrtf(variance * C2inv + eps);
-                    else if (epsMode_ == outsideSqrt)
-                        variance = 1.f / (sqrtf(variance * C2inv) + eps);
+                    if (epsMode_ == INSIDE_SQRT)
+                        variance = 1.f / sqrtf(variance * C2inv + epsValue_);
+                    else if (epsMode_ == OUTSIDE_SQRT)
+                        variance = 1.f / (sqrtf(variance * C2inv) + epsValue_);
 
                     // mvn for this channel
                     (*mvn_kernel)(&arg);
@@ -1015,7 +1062,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si
 
     for (size_t b = 0lu; b < N; b++) {
         size_t cb = b * C3;
-        if (across_channels) {
+        if (acrossChannels_) {
             // Parallel sum for each channel for mean
             float C3inv = 1.f / static_cast<float>(C3);
             float mean_temp = 0.0f;
@@ -1031,7 +1078,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si
 
             float mean = mean_temp * C3inv;
 
-            if (normalize_variance) {
+            if (normalizeVariance_) {
                 // parallel sum for each channel for variance
                 float variance_temp = 0.0f;
                 variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float {
@@ -1044,10 +1091,10 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si
                 });
 
                 float variance = 1.f;
-                if (epsMode_ == insideSqrt)
-                    variance = 1.f / sqrtf(variance_temp * C3inv + eps);
-                else if (epsMode_ == outsideSqrt)
-                    variance = 1.f / (sqrtf(variance_temp * C3inv) + eps);
+                if (epsMode_ == INSIDE_SQRT)
+                    variance = 1.f / sqrtf(variance_temp * C3inv + epsValue_);
+                else if (epsMode_ == OUTSIDE_SQRT)
+                    variance = 1.f / (sqrtf(variance_temp * C3inv) + epsValue_);
 
                 parallel_for(C, [&](int c) {
                     size_t cc = cb + c * C2;
@@ -1074,17 +1121,17 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si
                 }
                 mean *= C2inv;
 
-                if (normalize_variance) {
+                if (normalizeVariance_) {
                     // variance for this channel
                     float variance = 0.f;
                     for (size_t sp = 0lu; sp < C2; sp++) {
                         variance += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean);
                     }
 
-                    if (epsMode_ == insideSqrt)
-                        variance = 1.f / sqrtf(variance * C2inv + eps);
-                    else if (epsMode_ == outsideSqrt)
-                        variance = 1.f / (sqrtf(variance * C2inv) + eps);
+                    if (epsMode_ == INSIDE_SQRT)
+                        variance = 1.f / sqrtf(variance * C2inv + epsValue_);
+                    else if (epsMode_ == OUTSIDE_SQRT)
+                        variance = 1.f / (sqrtf(variance * C2inv) + epsValue_);
 
                     // mvn for this channel
                     for (size_t sp = 0lu; sp < C2; sp++) {
@@ -1126,7 +1173,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
     size_t C5 = C * D * H * W;
 
     size_t threads_num = parallel_get_num_threads();
-    size_t aux_buffer_size = across_channels ? blk_size : rnd_up(C, blk_size);
+    size_t aux_buffer_size = acrossChannels_ ? blk_size : rnd_up(C, blk_size);
     std::vector<float> mean_buffer(aux_buffer_size * threads_num);
     std::vector<float> variance_buffer(aux_buffer_size * threads_num);
 
@@ -1135,7 +1182,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
 
     for (size_t b = 0lu; b < N; b++) {
         size_t b_offset = is_nhwc ? b * C5 : b * C3;
-        if (across_channels) {
+        if (acrossChannels_) {
             // mean for this instance in batch
             float C5inv = 1.f / static_cast<float>(C5);
             float mean_temp = 0.0f;
@@ -1172,7 +1219,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
             });
             float mean = mean_temp * C5inv;
 
-            if (normalize_variance) {
+            if (normalizeVariance_) {
                 // variance: sum((x-mean)*(x-mean)) for one instance in batch
                 float variance_temp = 0.0f;
                 variance_temp = parallel_sum3d(CB, D, H, variance_temp, [&](size_t cb, size_t d, size_t h)->float {
@@ -1200,10 +1247,10 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
                 });
 
                 float variance = 1.f;
-                if (epsMode_ == insideSqrt)
-                    variance /= sqrtf(variance_temp * C5inv + eps);
-                else if (epsMode_ == outsideSqrt)
-                    variance /= sqrtf(variance_temp * C5inv) + eps;
+                if (epsMode_ == INSIDE_SQRT)
+                    variance /= sqrtf(variance_temp * C5inv + epsValue_);
+                else if (epsMode_ == OUTSIDE_SQRT)
+                    variance /= sqrtf(variance_temp * C5inv) + epsValue_;
                 // mvn for one instance in batch
                 parallel_for3d(CB, D, H, [&](size_t cb, size_t d, size_t h) {
                     size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size
@@ -1265,7 +1312,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
             for (size_t c = 0; c < C; c++)
                 mean_buffer[c] *= size_inv;
 
-            if (normalize_variance) {
+            if (normalizeVariance_) {
                 for (int i = 0; i < variance_buffer.size(); i++)
                     variance_buffer[i] = 0.f;
 
@@ -1291,10 +1338,10 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
                         variance_buffer[c] += variance_buffer[c + aux_buffer_size * i];
                 }
                 for (size_t c = 0; c < C; c++) {
-                    if (epsMode_ == insideSqrt)
-                        variance_buffer[c] = 1.f / sqrtf(variance_buffer[c] * size_inv + eps);
-                    else if (epsMode_ == outsideSqrt)
-                        variance_buffer[c] = 1.f / (sqrtf(variance_buffer[c] * size_inv) + eps);
+                    if (epsMode_ == INSIDE_SQRT)
+                        variance_buffer[c] = 1.f / sqrtf(variance_buffer[c] * size_inv + epsValue_);
+                    else if (epsMode_ == OUTSIDE_SQRT)
+                        variance_buffer[c] = 1.f / (sqrtf(variance_buffer[c] * size_inv) + epsValue_);
                 }
 
                 parallel_for2d(D, H, [&](size_t d, size_t h) {
@@ -1317,7 +1364,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
                     }
                 });
             } else {
-                // normalize_variance == false
+                // normalizeVariance_ == false
                 parallel_for2d(D, H, [&](size_t d, size_t h) {
                     for (size_t cb = 0; cb < CB; cb++) {
                         size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size
@@ -1340,39 +1387,12 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
     }
 }
 
-// Validates MVN node axes to check whether it can be executed on the current CPU implementation.
-// Supported cases:
-// 1D: axes: [0]
-// 2D: axes: [1]
-// 3D: axes: [1,2], [2]
-// 4D: axes: [1,2,3], [2,3]
-// 5D: axes: [1,2,3,4], [2,3,4]
-bool MKLDNNMVNNode::checkAxesSuitability(const std::shared_ptr<const ngraph::Node>& node) {
-    const auto mvn = std::dynamic_pointer_cast<const ngraph::op::v6::MVN>(node);
-    if (mvn != nullptr && node->get_input_size() == 2) {
-        if (auto axesNode = dynamic_cast<ngraph::op::v0::Constant*>(mvn->get_input_node_ptr(1))) {
-            auto& mvnShape = mvn->get_output_shape(0);
-            auto axesVal = axesNode->cast_vector<int>();
-            for (int& axe : axesVal)
-                axe = axe < 0 ? axe + mvnShape.size() : axe;
-            std::sort(axesVal.begin(), axesVal.end());
-            if (mvnShape.size() == 1) {
-                if (axesVal.size() == 1 && axesVal[0] == 0)
-                    return true;
-                else
-                    return false;
-            }
-            if (mvnShape.size() > 5 || (mvnShape.size() != axesVal.size() + 1 && mvnShape.size() != axesVal.size() + 2))
-                return false;
-            int value = mvnShape.size() - 1;
-            for (int i = axesVal.size() - 1; i >= 0; i--, value--) {
-                if (axesVal[i] != value)
-                    return false;
-            }
-            return true;
-        }
+bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const {
+    if (!mayiuse(cpu::x64::sse41)) {
+        return false;
     }
-    return false;
+
+    return canFuseSimpleOperation(node);
 }
 
 bool MKLDNNMVNNode::created() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h
index b28daa3f7e3..c23da5e0c11 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <mkldnn_node.h>
 #include <string>
 #include <memory>
@@ -73,9 +72,10 @@ struct jit_uni_mvn_kernel {
 
 class MKLDNNMVNNode : public MKLDNNNode {
 public:
-    MKLDNNMVNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNMVNNode() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
     void createPrimitive() override;
@@ -85,7 +85,15 @@ public:
         return false;
     }
 
-    static bool checkAxesSuitability(const std::shared_ptr<const ngraph::Node>&);
+    inline bool getAcrossChannels() const {
+        return acrossChannels_;
+    };
+
+    inline bool getNormalizeVariance() const {
+        return normalizeVariance_;
+    };
+
+    bool canFuse(const MKLDNNNodePtr& node) const override;
 
 private:
     void mvn_pln(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
@@ -98,15 +106,15 @@ private:
 
     std::tuple<size_t, size_t, size_t, size_t, size_t> get5dShapes(const InferenceEngine::SizeVector& dims);
 
-    bool across_channels = false;
-    bool normalize_variance = true;
-    float eps = 1e-9f;
+    bool acrossChannels_ = false;
+    bool normalizeVariance_ = true;
+    float epsValue_ = 1e-9f;
     // Defines way to add epsilon: inside sqrt or outside.
-    enum epsType {
-        insideSqrt,
-        outsideSqrt
+    enum MVNEpsMode {
+        INSIDE_SQRT,
+        OUTSIDE_SQRT
     };
-    epsType epsMode_;
+    MVNEpsMode epsMode_;
 
     InferenceEngine::Precision input_prec, output_prec;
     size_t src_data_size, dst_data_size;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
index fc18003ebc3..ff95f416573 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
@@ -4,12 +4,13 @@
 
 #include "mkldnn_normalize_node.h"
 
-#include <legacy/ie_layers_internal.hpp>
 #include <ie_parallel.hpp>
 
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_eltwise_node.h"
 #include "utils/bfloat16.hpp"
+#include "utils/general_utils.h"
+#include <mkldnn_extension_utils.h>
 #include "emitters/jit_bf16_emitters.hpp"
 #include "mkldnn_extension_utils.h"
 #include <cpu/x64/jit_uni_eltwise_injector.hpp>
@@ -19,6 +20,8 @@
 #include "nodes/common/cpu_convert.h"
 #include <mkldnn_selective_build.h>
 
+#include <ngraph/opsets/opset1.hpp>
+
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -152,7 +155,7 @@ private:
     }
 };
 
-// dst = src * modulo_inv * scale
+// dst = src * modulo_inv
 template <cpu_isa_t isa>
 struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public jit_generator {
     DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_normalize_kernel_f32)
@@ -188,8 +191,6 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji
 
         mov(reg_src, ptr[reg_params + GET_OFF(src)]);
         mov(reg_dst, ptr[reg_params + GET_OFF(dst)]);
-        mov(reg_modulo, ptr[reg_params + GET_OFF(modulo)]);
-        mov(reg_weights, ptr[reg_params + GET_OFF(weights)]);
         mov(reg_fused_factor, ptr[reg_params + GET_OFF(fused_factor)]);
         mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]);
         if (attr_.post_ops_.len() != 0)
@@ -220,10 +221,8 @@ private:
 
     Xbyak::Reg64 reg_src = r8;
     Xbyak::Reg64 reg_dst = r9;
-    Xbyak::Reg64 reg_modulo = r10;
-    Xbyak::Reg64 reg_weights = r11;
-    Xbyak::Reg64 reg_fused_factor = r12;
-    Xbyak::Reg64 reg_work_amount = r15;
+    Xbyak::Reg64 reg_fused_factor = r10;
+    Xbyak::Reg64 reg_work_amount = r11;
     Xbyak::Reg64 reg_params = abi_param1;
 
     Reg8 reg_tmp_8 = r14b;
@@ -258,10 +257,6 @@ private:
     inline void normalize_nchw() {
         if (jcp_.across_spatial) {
             uni_vbroadcastss(vmm_fused_factor, ptr[reg_fused_factor]);  // for channel_shared: false or true.
-        } else {
-            if (!jcp_.channel_shared) {
-                uni_vbroadcastss(vmm_scale, ptr[reg_weights]);
-            }
         }
 
         Xbyak::Label main_loop_label;
@@ -279,16 +274,9 @@ private:
             if (jcp_.across_spatial) {
                 uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);
             } else {
-                if (jcp_.channel_shared) {
-                    uni_vmovups(vmm_fused_factor, ptr[reg_fused_factor]);
-                    uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);
-                    add(reg_fused_factor, vlen);
-                } else {
-                    uni_vmovups(vmm_modulo, ptr[reg_modulo]);  // modulo: ld dynamic
-                    uni_vmulps(vmm_val, vmm_val, vmm_modulo);
-                    uni_vmulps(vmm_val, vmm_val, vmm_scale);    // weight: bc once
-                    add(reg_modulo, vlen);
-                }
+                uni_vmovups(vmm_fused_factor, ptr[reg_fused_factor]);
+                uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);
+                add(reg_fused_factor, vlen);
             }
             if (attr_.post_ops_.len() != 0) {
                 apply_post_ops(jcp_.dst_dt, 1);
@@ -313,16 +301,9 @@ private:
             if (jcp_.across_spatial) {
                 uni_vmulps(xmm_val, xmm_val, xmm_fused_factor);
             } else {
-                if (jcp_.channel_shared) {
-                    load_scalar(xmm_fused_factor, ptr[reg_fused_factor], memory::data_type::f32);
-                    uni_vmulps(xmm_val, xmm_val, xmm_fused_factor);
-                    add(reg_fused_factor, step * sizeof(float));
-                } else {
-                    load_scalar(xmm_modulo, ptr[reg_modulo], memory::data_type::f32);
-                    uni_vmulps(xmm_val, xmm_val, xmm_modulo);
-                    uni_vmulps(xmm_val, xmm_val, xmm_scale);
-                    add(reg_modulo, step * sizeof(float));
-                }
+                load_scalar(xmm_fused_factor, ptr[reg_fused_factor], memory::data_type::f32);
+                uni_vmulps(xmm_val, xmm_val, xmm_fused_factor);
+                add(reg_fused_factor, step * sizeof(float));
             }
             if (attr_.post_ops_.len() != 0) {
                 apply_post_ops(jcp_.dst_dt, 1);  // vector and boradcast
@@ -339,13 +320,7 @@ private:
     }
 
     inline void normalize_nhwc() {
-        if (jcp_.channel_shared) {
-            uni_vbroadcastss(vmm_fused_factor, ptr[reg_fused_factor]);
-        } else {
-            if (!jcp_.across_spatial) {
-                uni_vbroadcastss(vmm_modulo, ptr[reg_modulo]);
-            }
-        }
+        uni_vbroadcastss(vmm_fused_factor, ptr[reg_fused_factor]);
 
         Xbyak::Label main_loop_label;
         Xbyak::Label main_loop_end_label;
@@ -359,20 +334,8 @@ private:
             jl(main_loop_end_label, T_NEAR);
 
             load_vector(vmm_val, ptr[reg_src], jcp_.src_dt);
-            if (jcp_.channel_shared) {
-                uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);
-            } else {
-                if (jcp_.across_spatial) {
-                    uni_vmovups(vmm_fused_factor, ptr[reg_fused_factor]);
-                    uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);
-                    add(reg_fused_factor, vlen);
-                } else {
-                    uni_vmovups(vmm_scale, ptr[reg_weights]);
-                    uni_vmulps(vmm_val, vmm_val, vmm_scale);
-                    uni_vmulps(vmm_val, vmm_val, vmm_modulo);
-                    add(reg_weights, vlen);
-                }
-            }
+            uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);
+
             if (attr_.post_ops_.len() != 0) {
                 apply_post_ops(jcp_.dst_dt, 0);
                 add(reg_oc_off, vlen);  // out channel offset of fused ops weights in byte
@@ -394,20 +357,8 @@ private:
             jl(tail_loop_end_label, T_NEAR);
 
             load_scalar(xmm_val, ptr[reg_src], jcp_.src_dt);
-            if (jcp_.channel_shared) {
-                uni_vmulps(xmm_val, xmm_val, xmm_fused_factor);
-            } else {
-                if (jcp_.across_spatial) {
-                    load_scalar(xmm_fused_factor, ptr[reg_fused_factor], memory::data_type::f32);
-                    uni_vmulps(xmm_val, xmm_val, xmm_fused_factor);
-                    add(reg_fused_factor, step * sizeof(float));
-                } else {
-                    load_scalar(xmm_scale, ptr[reg_weights], memory::data_type::f32);
-                    uni_vmulps(xmm_val, xmm_val, xmm_scale);
-                    uni_vmulps(xmm_val, xmm_val, xmm_modulo);
-                    add(reg_weights, step * sizeof(float));
-                }
-            }
+            uni_vmulps(xmm_val, xmm_val, xmm_fused_factor);
+
             if (attr_.post_ops_.len() != 0) {
                 apply_post_ops(jcp_.dst_dt, 0);
                 add(reg_oc_off, step * sizeof(float));
@@ -438,14 +389,7 @@ private:
         bool is_sse42 = (isa == cpu::x64::sse41);
 
         if (jcp_.across_spatial) {
-            if (jcp_.channel_shared) {
-                uni_vbroadcastss(vmm_fused_factor, ptr[reg_fused_factor]);
-            } else {
-                uni_vmovups(vmm_fused_factor, ptr[reg_fused_factor]);
-                if (is_sse42) {
-                    uni_vmovups(vmm_fused_factor2, ptr[reg_fused_factor + simd_w * sizeof(float)]);
-                }
-            }
+            uni_vbroadcastss(vmm_fused_factor, ptr[reg_fused_factor]);
 
             Xbyak::Label norm_loop_label;
             Xbyak::Label norm_loop_end_label;
@@ -466,11 +410,7 @@ private:
                 if (is_sse42) {
                     int sse42_offset = 4;
                     load_vector(vmm_val, ptr[reg_src + sse42_offset * jcp_.src_data_size], jcp_.src_dt);
-                    if (jcp_.channel_shared) {
-                        uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);  // bc once
-                    } else {
-                        uni_vmulps(vmm_val, vmm_val, vmm_fused_factor2);  // ld once
-                    }
+                    uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);  // bc once
                     if (attr_.post_ops_.len() != 0) {
                         add(reg_oc_off, sse42_offset * sizeof(float));
                         apply_post_ops(jcp_.dst_dt, 0);
@@ -486,11 +426,7 @@ private:
             }
             L(norm_loop_end_label);
         } else {  // across_saptail is flase
-            if (jcp_.channel_shared) {
-                uni_vbroadcastss(vmm_fused_factor, ptr[reg_fused_factor]);
-            } else {
-                uni_vbroadcastss(vmm_modulo, ptr[reg_modulo]);
-            }
+            uni_vbroadcastss(vmm_fused_factor, ptr[reg_fused_factor]);
             size_t src_stride = jcp_.w * jcp_.h * blk_size * jcp_.src_data_size;
             size_t dst_stride = jcp_.w * jcp_.h * blk_size * jcp_.dst_data_size;
 
@@ -503,14 +439,7 @@ private:
                 jle(norm_loop_end_label, T_NEAR);
 
                 load_vector(vmm_val, ptr[reg_src], jcp_.src_dt);
-                if (jcp_.channel_shared) {
-                    uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);
-                } else {
-                    uni_vmovups(vmm_scale, ptr[reg_weights]);
-                    uni_vmulps(vmm_val, vmm_val, vmm_scale);
-                    uni_vmulps(vmm_val, vmm_val, vmm_modulo);
-                    add(reg_weights, vlen);
-                }
+                uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);
                 if (attr_.post_ops_.len() != 0) {
                     apply_post_ops(jcp_.dst_dt, 0);
                     add(reg_oc_off, vlen);  // vlen is related isa
@@ -520,14 +449,7 @@ private:
                 if (is_sse42) {
                     int sse42_offset = 4;
                     load_vector(vmm_val, ptr[reg_src + sse42_offset * jcp_.src_data_size], jcp_.src_dt);
-                    if (jcp_.channel_shared) {
-                        uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);  // bc once
-                    } else {
-                        uni_vmovups(vmm_scale, ptr[reg_weights]);  // ld dynamic
-                        uni_vmulps(vmm_val, vmm_val, vmm_scale);
-                        uni_vmulps(vmm_val, vmm_val, vmm_modulo);  // bc once
-                        add(reg_weights, vlen);  // 4 * sizeof(float)
-                    }
+                    uni_vmulps(vmm_val, vmm_val, vmm_fused_factor);  // bc once
                     if (attr_.post_ops_.len() != 0) {
                         apply_post_ops(jcp_.dst_dt, 0);
                         add(reg_oc_off, vlen);  // vlen is related isa
@@ -721,87 +643,94 @@ private:
     }
 };
 
-MKLDNNNormalizeNode::MKLDNNNormalizeNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache), src_data_size(0lu), dst_data_size(0lu), weights_data_size(0lu),
-        input_prec(Precision::UNSPECIFIED), output_prec(Precision::UNSPECIFIED), weights_prec(Precision::UNSPECIFIED) {}
-
-void MKLDNNNormalizeNode::getSupportedDescriptors() {
-    if (!descs.empty())
-        return;
-
-    std::string errPrefix = "Normalize node with name '" + getName() + "' ";
-    if (getParentEdges().size() != 1)
-        IE_THROW() << errPrefix << " has incorrect number of input edges: " << getParentEdges().size();
-    if (getChildEdges().empty())
-        IE_THROW() << errPrefix << " has incorrect number of output edges: " << getChildEdges().size();
-
-    if (getParentEdgeAt(0)->getDims().ndims() > 4 || getParentEdgeAt(0)->getDims().ndims() < 2) {
-        IE_THROW() << errPrefix << "has invalid input shape. Normalize supports from 2D to 4D blobs.";
-    }
-
-    auto *layer = getCnnLayer().get();
-    if (layer == nullptr)
-        IE_THROW() << errPrefix << " has nullable CnnLayer.";
-    across_spatial = layer->GetParamAsBool("across_spatial", false);
-    channel_shared = layer->GetParamAsBool("channel_shared", false);
-    eps = layer->GetParamAsFloat("eps");
-
-    MemoryBlob::Ptr tweights = as<MemoryBlob>(layer->blobs.at("weights"));
-    if (!tweights) {
-        IE_THROW() << errPrefix << "has not initialized weights or they cannot be casted to MemoryBlob.";
-    }
-
-    auto inData = getCnnLayer()->insData[0].lock();
-    if (inData == nullptr) {
-        IE_THROW() << errPrefix << "has nullable input data.";
-    }
-    const auto& inDims = inData->getDims();
-    if (inDims.size() < 2)
-        IE_THROW() << errPrefix << "has unsupported layout: '" << inData->getLayout() << "'.";
-    const size_t channels = inDims[1];
-    const auto weightsSize = tweights->size();
-    if (weightsSize != channels) {
-        if (weightsSize == 1) {
-            channel_shared = true;
-        } else {
-            IE_THROW() << errPrefix << "has unsupported broadcast type. Channels size: " << channels << "; Weights size: " << weightsSize;
-        }
-    }
-
-    weights_prec = tweights->getTensorDesc().getPrecision();
-    if (weights_prec != Precision::FP32 && weights_prec != Precision::BF16) {
-        // Unknown non supported data type, return an error
-        IE_THROW() << layer->name << "Weights for layer Normalize with name '" << layer->name <<
-            "' has unsupported data type " << tweights->getTensorDesc().getPrecision();
-    }
-
-    TensorDesc td(Precision::FP32, tweights->getTensorDesc().getDims(), tweights->getTensorDesc().getLayout());
-    weights_blob = make_shared_blob<float>(td);
-    weights_blob->allocate();
-    float* dst = weights_blob->wmap();
-    if (weights_prec == Precision::FP32) {
-        float* src = layer->blobs.at("weights")->buffer();
-        cpu_memcpy(dst, src, layer->blobs.at("weights")->byteSize());
-    } else if (weights_prec == Precision::BF16) {
-        short* bf16src = tweights->rmap().as<short*>();
-        cpu_convert(bf16src, dst, Precision::BF16, Precision::FP32, weights_blob->size());
+MKLDNNNormalizeL2Node::MKLDNNNormalizeL2Node(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache), src_data_size(0lu), dst_data_size(0lu), input_prec(Precision::UNSPECIFIED), output_prec(Precision::UNSPECIFIED) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "NormalizeL2 node with name '" + getName() + "' ";
+        const auto norm = std::dynamic_pointer_cast<const ngraph::op::v0::NormalizeL2>(op);
+        eps = norm->get_eps();
+        epsMode = norm->get_eps_mode() == ngraph::op::EpsMode::MAX ? NormEpsMode::MAX : NormEpsMode::ADD;
+        across_spatial = ngraph::shape_size(op->get_input_shape(AXES)) != 1;
+        // One of the corner cases is when axes is an empty list,
+        // then we divide each input element by itself resulting value 1 for all non-zero elements
+        cornerCase = ngraph::shape_size(op->get_input_shape(AXES)) == 0;
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
     }
 }
 
-void MKLDNNNormalizeNode::initSupportedPrimitiveDescriptors() {
+bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto norm = std::dynamic_pointer_cast<const ngraph::op::v0::NormalizeL2>(op);
+        if (!norm) {
+            errorMessage = "Only opset1 NormalizeL2 operation is supported";
+            return false;
+        }
+        const auto dataDims = norm->get_input_shape(DATA);
+        if (dataDims.size() < 2 && dataDims.size() > 4) {
+            errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(dataDims.size());
+            return false;
+        }
+        const auto axesNode = std::dynamic_pointer_cast<const ngraph::op::v0::Constant>(norm->get_input_node_shared_ptr(AXES));
+        if (!axesNode) {
+            errorMessage = "Supports only constant 'axes' input";
+            return false;
+        }
+
+        const auto isSupportedAxes = [](const std::vector<size_t> &axes, const ngraph::Shape &dataDims) {
+            if (axes.size() == 1 && axes[0] == 1) {
+                return true;
+            } else if (axes.size() == dataDims.size() - 1) {
+                for (size_t i = 0; i < axes.size(); i++) {
+                    if (axes[i] != i + 1)
+                        return false;
+                }
+                return true;
+            }
+            return false;
+        };
+        const auto axes = axesNode->cast_vector<size_t>();
+        if (!isSupportedAxes(axes, dataDims) && ngraph::shape_size(axesNode->get_shape()) != 0) {
+            errorMessage = "Doesn't support reduction axes: " + vec2str(axes);
+            return false;
+        }
+        const auto mode = norm->get_eps_mode();
+        if (mode != ngraph::op::EpsMode::ADD && mode != ngraph::op::EpsMode::MAX) {
+            errorMessage = "Doesn't support eps_mode: " + ngraph::as_string(mode);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+void MKLDNNNormalizeL2Node::getSupportedDescriptors() {
+    if (!descs.empty())
+        return;
+
+    if (getParentEdges().size() != 2)
+        IE_THROW() << errorPrefix << " has incorrect number of input edges: " << getParentEdges().size();
+    if (getChildEdges().empty())
+        IE_THROW() << errorPrefix << " has incorrect number of output edges: " << getChildEdges().size();
+
+    if (getParentEdgeAt(0)->getDims().ndims() > 4 || getParentEdgeAt(0)->getDims().ndims() < 2) {
+        IE_THROW() << errorPrefix << "has invalid input shape. Normalize supports from 2D to 4D blobs.";
+    }
+}
+
+void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
     setPostOps(attr, true);
 
-    Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
-    Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision();
+    Precision inputPrecision = getOriginalInputPrecisionAtPort(DATA);
+    Precision outputPrecision = getOriginalOutputPrecisionAtPort(DATA);
 
     if (!fusedWith.empty()) {
-        auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
-        if (lastFusedLayer) {
-            outputPrecision = lastFusedLayer->outData[0]->getPrecision();
-        }
+        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
 
     if (inputPrecision == Precision::BF16 || outputPrecision == Precision::BF16) {
@@ -811,53 +740,38 @@ void MKLDNNNormalizeNode::initSupportedPrimitiveDescriptors() {
             inputPrecision = outputPrecision = Precision::BF16;
     }
 
-    auto isOneOf = [&](InferenceEngine::Precision precision, std::vector<InferenceEngine::Precision> precisions) {
-        for (auto p : precisions) {
-            if (precision == p) {
-                return true;
-            }
-        }
-        return false;
-    };
-    if (!isOneOf(inputPrecision, {Precision::FP32, Precision::BF16, Precision::I8, Precision::U8})) {
-        IE_THROW() << "Unsupported input precision. " << getName();
+    if (!one_of(inputPrecision, Precision::FP32, Precision::BF16, Precision::I8, Precision::U8)) {
+        IE_THROW() << errorPrefix << "has unsupported input precision. " << getName();
     }
-    if (!isOneOf(outputPrecision, {Precision::FP32, Precision::BF16, Precision::I8, Precision::U8})) {
-        IE_THROW() << "Unsupported output precision. " << getName();
-    }
-    if (!isOneOf(weights_prec, {Precision::FP32, Precision::BF16})) {
-        IE_THROW() << "Unsupported wights precision. " << getName();
+    if (!one_of(outputPrecision, Precision::FP32, Precision::BF16, Precision::I8, Precision::U8)) {
+        IE_THROW() << errorPrefix << "has unsupported output precision. " << getName();
     }
 
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision);
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision);
-    auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(weights_prec);
 
     input_prec = inputPrecision;
     output_prec = outputPrecision;
     src_data_size = MKLDNNExtensionUtils::sizeOfDataType(inputDataType);
     dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(outputDataType);
-    weights_data_size = MKLDNNExtensionUtils::sizeOfDataType(weightsDataType);
 
-    bool canBeInplace = src_data_size == dst_data_size && getParentEdgeAt(0)->getParent()->getChildEdges().size() == 1;
+    bool canBeInplace = src_data_size == dst_data_size && getParentEdgeAt(DATA)->getParent()->getChildEdges().size() == 1;
 
-    InferenceEngine::LayerConfig config;
+    LayerConfig config;
     config.dynBatchSupport = false;
-    config.inConfs.resize(1);
+    config.inConfs.resize(2);
     config.outConfs.resize(1);
-    config.inConfs[0].constant = false;
-    config.outConfs[0].constant = false;
-    config.inConfs[0].inPlace = -1;
     config.outConfs[0].inPlace = canBeInplace ? 0 : -1;
 
     auto pushDesc = [&](memory::format_tag format) {
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, format);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), outputDataType, format);
+        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA)->getDims(), inputDataType, format);
+        config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES)->getDims(), memory::data_type::s32, memory::format_tag::x);
+        config.outConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA)->getDims(), outputDataType, format);
         supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, format});
     };
 
     // only plain layout support when w/o sse42
-    if (getParentEdgeAt(0)->getDims().ndims() == 4) {
+    if (getParentEdgeAt(DATA)->getDims().ndims() == 4 && !cornerCase) {
         if (mayiuse(cpu::x64::sse41)) {
             pushDesc(memory::format_tag::nhwc);
             if (mayiuse(cpu::x64::avx512_common)) {
@@ -869,16 +783,20 @@ void MKLDNNNormalizeNode::initSupportedPrimitiveDescriptors() {
     }
     if (canBeInplace)
         config.inConfs[0].inPlace = 0;
-    pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims()));
+    pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(DATA)->getDims()));
 }
 
-void MKLDNNNormalizeNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
+bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const {
+    return !cornerCase && canFuseSimpleOperation(node);
+}
+
+void MKLDNNNormalizeL2Node::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
     mkldnn::post_ops ops;
 
     for (auto &node : fusedWith) {
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode) {
-            quantizeNode->appendPostOps(ops);
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
+        if (fakeQuantizeNode) {
+            fakeQuantizeNode->appendPostOps(ops);
             continue;
         }
 
@@ -894,65 +812,66 @@ void MKLDNNNormalizeNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeig
     attr.set_post_ops(ops);
 }
 
-void MKLDNNNormalizeNode::createPrimitive() {
-    auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
-    auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
+void MKLDNNNormalizeL2Node::createPrimitive() {
+    auto& dstMemPtr = getChildEdgeAt(DATA)->getMemoryPtr();
+    auto& srcMemPtr = getParentEdgeAt(DATA)->getMemoryPtr();
     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Destination memory didn't allocate.";
+        IE_THROW() << errorPrefix << "can't get destination memory";
     if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Input memory didn't allocate.";
+        IE_THROW() << errorPrefix << "can't get input memory";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
+        IE_THROW() << errorPrefix << "has nullable preferable primitive descriptor";
 
-    auto selectedPD = getSelectedPrimitiveDescriptor();
-    jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc.getPrecision());
-    jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision());
-    jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt);
-    jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt);
+    if (!cornerCase) {
+        auto selectedPD = getSelectedPrimitiveDescriptor();
+        jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc.getPrecision());
+        jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision());
+        jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt);
+        jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt);
 
-    jcp.is_nchw = jcp.is_nhwc = jcp.is_blk = false;
-    if (getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat()) {
-        jcp.is_nchw = true;
-    } else if (getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) {
-        jcp.is_blk = true;
-    } else {
-        jcp.is_nhwc = true;
-    }
+        jcp.is_nchw = jcp.is_nhwc = jcp.is_blk = false;
+        if (getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat()) {
+            jcp.is_nchw = true;
+        } else if (getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) {
+            jcp.is_blk = true;
+        } else {
+            jcp.is_nhwc = true;
+        }
 
-    jcp.across_spatial = across_spatial;
-    jcp.channel_shared = channel_shared;
-    auto dims = getParentEdgeAt(0)->getDesc().getDims();
-    size_t dims_size = dims.size();
-    jcp.n = (dims_size > 0) ? dims[0] : 1lu;
-    jcp.c = (dims_size > 1) ? dims[1] : 1lu;
-    jcp.h = (dims_size > 2) ? dims[2] : 1lu;
-    jcp.w = (dims_size > 3) ? dims[3] : 1lu;
+        jcp.across_spatial = across_spatial;
+        auto dims = getParentEdgeAt(0)->getDesc().getDims();
+        size_t dims_size = dims.size();
+        jcp.n = (dims_size > 0) ? dims[0] : 1lu;
+        jcp.c = (dims_size > 1) ? dims[1] : 1lu;
+        jcp.h = (dims_size > 2) ? dims[2] : 1lu;
+        jcp.w = (dims_size > 3) ? dims[3] : 1lu;
 
-    if (mayiuse(cpu::x64::avx512_common)) {
-        normalize_modulo_kernel.reset(new jit_uni_normalize_modulo_kernel_f32<cpu::x64::avx512_common>(jcp));
-        normalize_kernel.reset(new jit_uni_normalize_kernel_f32<cpu::x64::avx512_common>(jcp, *attr.get()));
-    } else if (mayiuse(cpu::x64::avx2)) {
-        normalize_modulo_kernel.reset(new jit_uni_normalize_modulo_kernel_f32<cpu::x64::avx2>(jcp));
-        normalize_kernel.reset(new jit_uni_normalize_kernel_f32<cpu::x64::avx2>(jcp, *attr.get()));
-    } else if (mayiuse(cpu::x64::sse41)) {
-        normalize_modulo_kernel.reset(new jit_uni_normalize_modulo_kernel_f32<cpu::x64::sse41>(jcp));
-        normalize_kernel.reset(new jit_uni_normalize_kernel_f32<cpu::x64::sse41>(jcp, *attr.get()));
-    }
-    if (normalize_kernel)
-        normalize_kernel->create_ker();
+        if (mayiuse(cpu::x64::avx512_common)) {
+            normalize_modulo_kernel.reset(new jit_uni_normalize_modulo_kernel_f32<cpu::x64::avx512_common>(jcp));
+            normalize_kernel.reset(new jit_uni_normalize_kernel_f32<cpu::x64::avx512_common>(jcp, *attr.get()));
+        } else if (mayiuse(cpu::x64::avx2)) {
+            normalize_modulo_kernel.reset(new jit_uni_normalize_modulo_kernel_f32<cpu::x64::avx2>(jcp));
+            normalize_kernel.reset(new jit_uni_normalize_kernel_f32<cpu::x64::avx2>(jcp, *attr.get()));
+        } else if (mayiuse(cpu::x64::sse41)) {
+            normalize_modulo_kernel.reset(new jit_uni_normalize_modulo_kernel_f32<cpu::x64::sse41>(jcp));
+            normalize_kernel.reset(new jit_uni_normalize_kernel_f32<cpu::x64::sse41>(jcp, *attr.get()));
+        }
+        if (normalize_kernel)
+            normalize_kernel->create_ker();
 
-    if (normalize_modulo_kernel)
-        normalize_modulo_kernel->create_ker();
+        if (normalize_modulo_kernel)
+            normalize_modulo_kernel->create_ker();
 
-    const auto &p = (*attr.get()).post_ops_;
-    for (int i = 0; i < p.len(); i++) {
-        auto &post_op = p.entry_[i];
-        if (post_op.is_eltwise()) {
-            eltwise_injectors_ref.push_back(std::make_shared<cpu::ref_eltwise_scalar_fwd_t>(
-                post_op.eltwise.alg, post_op.eltwise.alpha, post_op.eltwise.beta, post_op.eltwise.scale));
-        } else if (post_op.is_depthwise()) {
-            depthwise_injectors_ref.push_back(std::make_shared<cpu::ref_depthwise_scalar_fwd_t>(
-                    post_op.depthwise.alg));
+        const auto &p = (*attr.get()).post_ops_;
+        for (int i = 0; i < p.len(); i++) {
+            auto &post_op = p.entry_[i];
+            if (post_op.is_eltwise()) {
+                eltwise_injectors_ref.push_back(std::make_shared<cpu::ref_eltwise_scalar_fwd_t>(
+                    post_op.eltwise.alg, post_op.eltwise.alpha, post_op.eltwise.beta, post_op.eltwise.scale));
+            } else if (post_op.is_depthwise()) {
+                depthwise_injectors_ref.push_back(std::make_shared<cpu::ref_depthwise_scalar_fwd_t>(
+                        post_op.depthwise.alg));
+            }
         }
     }
 }
@@ -960,16 +879,16 @@ void MKLDNNNormalizeNode::createPrimitive() {
 namespace {
 
 struct NormalizeContext {
-    MKLDNNNormalizeNode &node;
+    MKLDNNNormalizeL2Node &node;
     const uint8_t *src;
     uint8_t *dst;
-    const InferenceEngine::SizeVector& dims;
+    const SizeVector& dims;
 };
 
 }   // namespace
 
 template<typename T>
-struct MKLDNNNormalizeNode::NormalizeExecute {
+struct MKLDNNNormalizeL2Node::NormalizeExecute {
     using src_t = typename std::tuple_element<0, T>::type;
     using dst_t = typename std::tuple_element<1, T>::type;
 
@@ -980,13 +899,13 @@ struct MKLDNNNormalizeNode::NormalizeExecute {
     }
 };
 
-void MKLDNNNormalizeNode::execute(mkldnn::stream strm) {
-    auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
-    auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
+void MKLDNNNormalizeL2Node::execute(mkldnn::stream strm) {
+    auto &srcMemPtr = getParentEdgeAt(DATA)->getMemoryPtr();
+    auto &dstMemPtr = getChildEdgeAt(DATA)->getMemoryPtr();
     const uint8_t *src_ptr = reinterpret_cast<const uint8_t*>(srcMemPtr->GetPtr());
     uint8_t *dst_ptr = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
 
-    auto dims = getParentEdgeAt(0)->getDesc().getDims();
+    auto dims = getParentEdgeAt(DATA)->getDesc().getDims();
 
     NormalizeContext ctx = {
         *this,
@@ -1009,7 +928,7 @@ void MKLDNNNormalizeNode::execute(mkldnn::stream strm) {
 }
 
 template <typename in_data_t, typename out_data_t>
-void MKLDNNNormalizeNode::normalize_nchw(const in_data_t* src_data, out_data_t* dst_data, const InferenceEngine::SizeVector& dims) {
+void MKLDNNNormalizeL2Node::normalize_nchw(const in_data_t* src_data, out_data_t* dst_data, const SizeVector& dims) {
     size_t blk_size = 1;  // elt in vmm
     if (mayiuse(cpu::x64::avx512_common)) {
         blk_size = 16;
@@ -1024,7 +943,6 @@ void MKLDNNNormalizeNode::normalize_nchw(const in_data_t* src_data, out_data_t*
     size_t H = (dims_size > 2) ? dims[2] : 1lu;
     size_t C = (dims_size > 1) ? dims[1] : 1lu;
     size_t B = (dims_size > 0) ? dims[0] : 1lu;
-    float *weights = weights_blob->buffer().as<float *>();
 
     for (size_t b = 0lu; b < B; b++) {
         const in_data_t *src_data_b = src_data + b * C * H * W;
@@ -1056,17 +974,16 @@ void MKLDNNNormalizeNode::normalize_nchw(const in_data_t* src_data, out_data_t*
             });
 
             modulo = std::sqrt(modulo);
-            float modulo_inv = 1.0f / (modulo + eps);
+            float modulo_inv = 1.0f / (epsApply(modulo));
 
             // normalize
             parallel_for(C, [&](size_t ic) {
                 const in_data_t *src_data_bc = src_data_b + ic * H * W;
                 out_data_t *dst_data_bc = dst_data_b + ic * H * W;
-                float fused_weight_modulo = channel_shared ? (weights[0] * modulo_inv) : (weights[ic] * modulo_inv);
                 auto arg = jit_normalize_call_args();
                 arg.src = src_data_bc;
                 arg.dst = dst_data_bc;
-                arg.fused_factor = static_cast<float*>(&fused_weight_modulo);  // broadcast once
+                arg.fused_factor = static_cast<float*>(&modulo_inv);  // broadcast once
                 arg.oc_off = ic * sizeof(float);
                 arg.work_amount = static_cast<size_t>(W * H);
                 (*normalize_kernel)(&arg);
@@ -1096,9 +1013,7 @@ void MKLDNNNormalizeNode::normalize_nchw(const in_data_t* src_data, out_data_t*
             });
 
             for (size_t m = 0; m < H * W; m++) {
-                moduloM[m] = 1.0f / (std::sqrt(moduloM[m]) + eps);
-                if (channel_shared)
-                    moduloM[m] = moduloM[m] * weights[0];
+                moduloM[m] = 1.0f / (std::sqrt(epsApply(moduloM[m])));
             }
 
             // normalize
@@ -1108,12 +1023,7 @@ void MKLDNNNormalizeNode::normalize_nchw(const in_data_t* src_data, out_data_t*
                 auto arg = jit_normalize_call_args();
                 arg.src = src_data_bc;
                 arg.dst = dst_data_bc;
-                if (channel_shared) {
-                    arg.fused_factor = static_cast<float*>(&moduloM[0]);  // ld dynamic
-                } else {
-                    arg.modulo = static_cast<float*>(&moduloM[0]);    // ld dynamic
-                    arg.weights = static_cast<float*>(&weights[ic]);  // bc once
-                }
+                arg.fused_factor = static_cast<float*>(&moduloM[0]);  // ld dynamic
                 arg.oc_off = ic * sizeof(float);
                 arg.work_amount = static_cast<size_t>(W * H);
                 (*normalize_kernel)(&arg);
@@ -1123,13 +1033,12 @@ void MKLDNNNormalizeNode::normalize_nchw(const in_data_t* src_data, out_data_t*
 }
 
 template <typename in_data_t, typename out_data_t>
-void MKLDNNNormalizeNode::normalize_nchw_ref(const in_data_t* src_data, out_data_t* dst_data, const InferenceEngine::SizeVector& dims) {
+void MKLDNNNormalizeL2Node::normalize_nchw_ref(const in_data_t* src_data, out_data_t* dst_data, const SizeVector& dims) {
     size_t dims_size = dims.size();
     size_t W = (dims_size > 3) ? dims[3] : 1lu;
     size_t H = (dims_size > 2) ? dims[2] : 1lu;
     size_t C = (dims_size > 1) ? dims[1] : 1lu;
     size_t B = (dims_size > 0) ? dims[0] : 1lu;
-    float *weights = weights_blob->buffer().as<float *>();
 
     for (size_t b = 0lu; b < B; b++) {
         const in_data_t *src_data_b = src_data + b * C * H * W;
@@ -1148,15 +1057,14 @@ void MKLDNNNormalizeNode::normalize_nchw_ref(const in_data_t* src_data, out_data
             });
 
             modulo = std::sqrt(modulo);
-            float modulo_inv = 1.0f / (modulo + eps);
+            float modulo_inv = 1.0f / (epsApply(modulo));
 
             // normalize
             parallel_for(C, [&](size_t ic) {
                 const in_data_t *src_data_bc = src_data_b + ic * H * W;
                 out_data_t *dst_data_bc = dst_data_b + ic * H * W;
-                float fused_weight_modulo = channel_shared ? (weights[0] * modulo_inv) : (weights[ic] * modulo_inv);
                 for (size_t m = 0; m < W * H; m++) {
-                    float dst_value = src_data_bc[m] * fused_weight_modulo;
+                    float dst_value = src_data_bc[m] * modulo_inv;
                     apply_post_ops_scalar(dst_value, ic);
                     if (output_prec == Precision::U8) {
                         dst_data_bc[m] = (dst_value >= 0) ? dst_value : 0;
@@ -1180,9 +1088,7 @@ void MKLDNNNormalizeNode::normalize_nchw_ref(const in_data_t* src_data, out_data
             });
 
             for (size_t m = 0; m < H * W; m++) {
-                moduloM[m] = 1.0f / (std::sqrt(moduloM[m]) + eps);
-                if (channel_shared)
-                    moduloM[m] = moduloM[m] * weights[0];
+                moduloM[m] = 1.0f / (std::sqrt(epsApply(moduloM[m])));
             }
 
             // normalize
@@ -1190,8 +1096,7 @@ void MKLDNNNormalizeNode::normalize_nchw_ref(const in_data_t* src_data, out_data
                 const in_data_t *src_data_bc = src_data_b + ic * H * W;
                 out_data_t *dst_data_bc = dst_data_b + ic * H * W;
                 for (size_t m = 0; m < W * H; m++) {
-                    float dst_value = channel_shared ? src_data_bc[m] * moduloM[m] :
-                                      src_data_bc[m] * moduloM[m] * weights[ic];
+                    float dst_value = src_data_bc[m] * moduloM[m];
                     apply_post_ops_scalar(dst_value, ic);
                     if (output_prec == Precision::U8) {
                         dst_data_bc[m] = (dst_value >= 0) ? dst_value : 0;
@@ -1205,7 +1110,7 @@ void MKLDNNNormalizeNode::normalize_nchw_ref(const in_data_t* src_data, out_data
 }
 
 template <typename in_data_t, typename out_data_t>
-void MKLDNNNormalizeNode::normalize_nhwc(const in_data_t* src_data, out_data_t* dst_data, const InferenceEngine::SizeVector& dims) {
+void MKLDNNNormalizeL2Node::normalize_nhwc(const in_data_t* src_data, out_data_t* dst_data, const SizeVector& dims) {
     size_t blk_size = 1;  // elt in vmm
     if (mayiuse(cpu::x64::avx512_common)) {
         blk_size = 16;
@@ -1220,7 +1125,6 @@ void MKLDNNNormalizeNode::normalize_nhwc(const in_data_t* src_data, out_data_t*
     size_t H = (dims_size > 2) ? dims[2] : 1lu;
     size_t C = (dims_size > 1) ? dims[1] : 1lu;
     size_t B = (dims_size > 0) ? dims[0] : 1lu;
-    float *weights = weights_blob->buffer().as<float *>();
 
     for (size_t b = 0lu; b < B; b++) {
         const in_data_t *src_data_b = src_data + b * C * H * W;
@@ -1251,39 +1155,20 @@ void MKLDNNNormalizeNode::normalize_nhwc(const in_data_t* src_data, out_data_t*
                 return modulo_kernel + modulo_tail;
             });
             modulo = std::sqrt(modulo);
-            float modulo_inv = 1.0f / (modulo + eps);
+            float modulo_inv = 1.0f / (epsApply(modulo));
 
             // normalize
-            if (channel_shared) {
-                float fused_weight_modulo = weights[0] * modulo_inv;
-                parallel_for2d(H, W, [&](int ih, int iw) {
-                    const in_data_t *src_data_bhw = src_data_b + ih * C * W + iw * C;
-                    out_data_t *dst_data_bhw = dst_data_b + ih * C * W + iw * C;
-                    auto arg = jit_normalize_call_args();
-                    arg.src = src_data_bhw;
-                    arg.dst = dst_data_bhw;
-                    arg.fused_factor = static_cast<float*>(&fused_weight_modulo);  // bc static
-                    arg.oc_off = 0;
-                    arg.work_amount = static_cast<size_t>(C);
-                    (*normalize_kernel)(&arg);
-                });
-            } else {  // channel_shared=false
-                std::vector<float> fused_weight_modulo(C);
-                for (size_t c = 0; c < C; c++) {
-                    fused_weight_modulo[c] = weights[c] * modulo_inv;
-                }
-                parallel_for2d(H, W, [&](int ih, int iw) {
-                    const in_data_t *src_data_bhw = src_data_b + ih * C * W + iw * C;
-                    out_data_t *dst_data_bhw = dst_data_b + ih * C * W + iw * C;
-                    auto arg = jit_normalize_call_args();
-                    arg.src = src_data_bhw;
-                    arg.dst = dst_data_bhw;
-                    arg.fused_factor = static_cast<float *>(&fused_weight_modulo[0]);  // ld dynamic
-                    arg.oc_off = 0;
-                    arg.work_amount = static_cast<size_t>(C);
-                    (*normalize_kernel)(&arg);
-                });
-            }
+            parallel_for2d(H, W, [&](int ih, int iw) {
+                const in_data_t *src_data_bhw = src_data_b + ih * C * W + iw * C;
+                out_data_t *dst_data_bhw = dst_data_b + ih * C * W + iw * C;
+                auto arg = jit_normalize_call_args();
+                arg.src = src_data_bhw;
+                arg.dst = dst_data_bhw;
+                arg.fused_factor = static_cast<float*>(&modulo_inv);  // bc static
+                arg.oc_off = 0;
+                arg.work_amount = static_cast<size_t>(C);
+                (*normalize_kernel)(&arg);
+            });
         } else {  // for across_spatial=false
             parallel_for2d(H, W, [&](int ih, int iw) {
                 // modulo
@@ -1305,18 +1190,11 @@ void MKLDNNNormalizeNode::normalize_nhwc(const in_data_t* src_data, out_data_t*
                 }
 
                 modulo = std::sqrt(modulo);
-                float modulo_inv = 1.0f / (modulo + eps);
+                float modulo_inv = 1.0f / (epsApply(modulo));
 
                 // normalize
                 arg.dst = dst_data_bhw;
-                float fused_weight_modulo = 0;
-                if (channel_shared) {
-                    fused_weight_modulo = modulo_inv * weights[0];
-                    arg.fused_factor = static_cast<float*>(&fused_weight_modulo);  // bc static
-                } else {
-                    arg.modulo = static_cast<float*>(&modulo_inv);  // bc static
-                    arg.weights = static_cast<float*>(&weights[0]); // ld dynamic
-                }
+                arg.fused_factor = static_cast<float*>(&modulo_inv);  // bc static
                 arg.work_amount = C;
                 arg.oc_off = 0;
                 (*normalize_kernel)(&arg);
@@ -1326,7 +1204,7 @@ void MKLDNNNormalizeNode::normalize_nhwc(const in_data_t* src_data, out_data_t*
 }
 
 template <typename in_data_t, typename out_data_t>
-void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* dst_data, const InferenceEngine::SizeVector& dims) {
+void MKLDNNNormalizeL2Node::normalize_blk(const in_data_t* src_data, out_data_t* dst_data, const SizeVector& dims) {
     size_t blk_size = 1;  // channel blk for memory layout
     if (mayiuse(cpu::x64::avx512_common)) {
         blk_size = 16;
@@ -1341,17 +1219,9 @@ void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* d
     size_t H = (dims_size > 2) ? dims[2] : 1lu;
     size_t C = (dims_size > 1) ? dims[1] : 1lu;
     size_t B = (dims_size > 0) ? dims[0] : 1lu;
-    float *weights = weights_blob->buffer().as<float *>();
 
     size_t CB = div_up(C, blk_size);
 
-    // normalize for tails: data is padding, norm weight is padding, so tails as vector for normalize;
-    // post ops for tails: post-ops params is padding.
-    std::vector<float> weights_padding(CB * blk_size);
-    if (!channel_shared) {
-        cpu_memcpy(static_cast<float*>(&weights_padding[0]), weights, C * sizeof(float));
-    }
-
     for (size_t b = 0lu; b < B; b++) {
         const in_data_t *src_data_b = src_data + b * CB * H * W * blk_size;
         out_data_t *dst_data_b = dst_data + b * CB * H * W * blk_size;
@@ -1383,39 +1253,20 @@ void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* d
             });
 
             modulo = std::sqrt(modulo);
-            float modulo_inv = 1.0f / (modulo + eps);
+            float modulo_inv = 1.0f / (epsApply(modulo));
 
             // normalize
-            if (channel_shared) {
-                float fused_weight_modulo = weights[0] * modulo_inv;
-                parallel_for2d(CB, H, [&](size_t cb, size_t h) {
-                    const in_data_t *src_data_b_cb_h = src_data_b + cb * H * W * blk_size + h * W * blk_size;
-                    out_data_t *dst_data_b_cb_h = dst_data_b + cb * H * W * blk_size + h * W * blk_size;
-                    auto arg = jit_normalize_call_args();
-                    arg.src = src_data_b_cb_h;
-                    arg.dst = dst_data_b_cb_h;
-                    arg.fused_factor = static_cast<float*>(&fused_weight_modulo);  // broadcast once
-                    arg.work_amount = static_cast<size_t>(W);
-                    arg.oc_off = cb * blk_size * sizeof(float);
-                    (*normalize_kernel)(&arg);
-                });
-            } else {
-                std::vector<float> fused_weight_modulo(weights_padding.size(), 0);
-                for (size_t c = 0; c < C; c++) {
-                    fused_weight_modulo[c] = weights_padding[c] * modulo_inv;
-                }
-                parallel_for2d(CB, H, [&](size_t cb, size_t h) {
-                    const in_data_t *src_data_b_cb_h = src_data_b + cb * H * W * blk_size + h * W * blk_size;
-                    out_data_t *dst_data_b_cb_h = dst_data_b + cb * H * W * blk_size + h * W * blk_size;
-                    auto arg = jit_normalize_call_args();
-                    arg.src = src_data_b_cb_h;
-                    arg.dst = dst_data_b_cb_h;
-                    arg.fused_factor = static_cast<float*>(&fused_weight_modulo[cb * blk_size]);  // load once
-                    arg.work_amount = static_cast<size_t>(W);
-                    arg.oc_off = cb * blk_size  * sizeof(float);
-                    (*normalize_kernel)(&arg);
-                });
-            }
+            parallel_for2d(CB, H, [&](size_t cb, size_t h) {
+                const in_data_t *src_data_b_cb_h = src_data_b + cb * H * W * blk_size + h * W * blk_size;
+                out_data_t *dst_data_b_cb_h = dst_data_b + cb * H * W * blk_size + h * W * blk_size;
+                auto arg = jit_normalize_call_args();
+                arg.src = src_data_b_cb_h;
+                arg.dst = dst_data_b_cb_h;
+                arg.fused_factor = static_cast<float*>(&modulo_inv);  // broadcast once
+                arg.work_amount = static_cast<size_t>(W);
+                arg.oc_off = cb * blk_size * sizeof(float);
+                (*normalize_kernel)(&arg);
+            });
         } else {  // across_spatial: false
             parallel_for2d(H, W, [&](size_t ih, size_t iw) {
                 // modulo
@@ -1439,18 +1290,11 @@ void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* d
                 }
 
                 modulo = std::sqrt(modulo);
-                float modulo_inv = 1.0f / (modulo + eps);
+                float modulo_inv = 1.0f / (epsApply(modulo));
 
                 // normalize
                 arg.dst = dst_data_bhw;
-                float fused_weight_modulo = 0;
-                if (channel_shared) {
-                    fused_weight_modulo = weights[0] * modulo_inv;
-                    arg.fused_factor = static_cast<float*>(&fused_weight_modulo);  // broadcast
-                } else {
-                    arg.weights = static_cast<float*>(&weights_padding[0]);  // load
-                    arg.modulo = static_cast<float*>(&modulo_inv);  // broadcast
-                }
+                arg.fused_factor = static_cast<float*>(&modulo_inv);  // broadcast
                 arg.work_amount = CB;
                 arg.oc_off = 0;
                 (*normalize_kernel)(&arg);
@@ -1460,8 +1304,13 @@ void MKLDNNNormalizeNode::normalize_blk(const in_data_t* src_data, out_data_t* d
 }
 
 template <typename in_data_t, typename out_data_t>
-void MKLDNNNormalizeNode::normalize_function(const in_data_t* src_data, out_data_t* dst_data, const InferenceEngine::SizeVector& dims) {
-    if (mayiuse(cpu::x64::sse41) && normalize_modulo_kernel && normalize_kernel) {
+void MKLDNNNormalizeL2Node::normalize_function(const in_data_t* src_data, out_data_t* dst_data, const SizeVector& dims) {
+    if (cornerCase) {
+        const auto workAmount = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<size_t>());
+        parallel_for(workAmount, [&](size_t i) {
+            dst_data[i] = src_data[i] == 0 ? 0 : 1;
+        });
+    } else if (mayiuse(cpu::x64::sse41) && normalize_modulo_kernel && normalize_kernel) {
         if (jcp.is_nchw) {
             normalize_nchw(src_data, dst_data, dims);
         } else if (jcp.is_nhwc) {
@@ -1469,18 +1318,18 @@ void MKLDNNNormalizeNode::normalize_function(const in_data_t* src_data, out_data
         } else if (jcp.is_blk) {
             normalize_blk(src_data, dst_data, dims);
         } else {
-            IE_THROW() << "The selected layout is not supported.";
+            IE_THROW() << errorPrefix << "has selected layout which is not supported.";
         }
     } else {
         if (jcp.is_nchw) {
             normalize_nchw_ref(src_data, dst_data, dims);
         } else {
-            IE_THROW() << "Only support plain layout on machine w/o sse42.";
+            IE_THROW() << errorPrefix << "supports only plain layout on machine w/o sse42.";
         }
     }
 }
 
-inline void MKLDNNNormalizeNode::apply_post_ops_scalar(float &dst_value, int index_c) {
+inline void MKLDNNNormalizeL2Node::apply_post_ops_scalar(float &dst_value, int index_c) {
     const auto &p = (*attr.get()).post_ops_;
     int eltwise_inj_idx = 0;
     int depthwise_inj_idx = 0;
@@ -1521,8 +1370,8 @@ inline void MKLDNNNormalizeNode::apply_post_ops_scalar(float &dst_value, int ind
     }
 }
 
-bool MKLDNNNormalizeNode::created() const {
-    return getType() == Normalize;
+bool MKLDNNNormalizeL2Node::created() const {
+    return getType() == NormalizeL2;
 }
 
-REG_MKLDNN_PRIM_FOR(MKLDNNNormalizeNode, Normalize);
+REG_MKLDNN_PRIM_FOR(MKLDNNNormalizeL2Node, NormalizeL2);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h
index fae0bd915da..7e99d063a3f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h
@@ -20,7 +20,6 @@ struct jit_normalize_config_params {
     bool is_nhwc;
     bool is_blk;
     bool across_spatial;
-    bool channel_shared;
     mkldnn::memory::data_type src_dt;
     mkldnn::memory::data_type dst_dt;
     int src_data_size;
@@ -31,7 +30,6 @@ struct jit_normalize_config_params {
 struct jit_normalize_call_args {
     const void *src;
     void *dst;
-    const float *weights;
     const float *modulo;
     const float *fused_factor;
     size_t src_stride;
@@ -73,10 +71,10 @@ struct jit_uni_normalize_kernel {
     const mkldnn_primitive_attr &attr_;
 };
 
-class MKLDNNNormalizeNode : public MKLDNNNode {
+class MKLDNNNormalizeL2Node : public MKLDNNNode {
 public:
-    MKLDNNNormalizeNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
-    ~MKLDNNNormalizeNode() override = default;
+    MKLDNNNormalizeL2Node(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNNormalizeL2Node() override = default;
 
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -87,7 +85,28 @@ public:
         return false;
     }
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+    bool canFuse(const MKLDNNNodePtr& node) const override;
+
 private:
+    enum class NormEpsMode {
+        ADD,
+        MAX
+    };
+    NormEpsMode epsMode = NormEpsMode::ADD;
+
+    float epsApply(const float &modulo) const {
+        if (epsMode == NormEpsMode::ADD) {
+            return modulo + eps;
+        } else if (epsMode == NormEpsMode::MAX) {
+            return std::max(modulo, eps);
+        } else {
+            IE_THROW() << errorPrefix << "has unsupported epsilon mode";
+        }
+    }
+
+    bool cornerCase = false;
+
     template<typename T>
     struct NormalizeExecute;
 
@@ -109,13 +128,11 @@ private:
     template <typename in_data_t, typename out_data_t>
     void normalize_function(const in_data_t* src_data, out_data_t* dst_data, const InferenceEngine::SizeVector& dims);
 
-    MemoryBlob::Ptr weights_blob;
     bool across_spatial = true;
-    bool channel_shared = true;
     float eps = 1e-10f;
 
-    InferenceEngine::Precision input_prec, output_prec, weights_prec;
-    size_t src_data_size, dst_data_size, weights_data_size;
+    InferenceEngine::Precision input_prec, output_prec;
+    size_t src_data_size, dst_data_size;
 
     mkldnn::primitive_attr attr;
 
@@ -128,6 +145,11 @@ private:
     std::vector<std::shared_ptr<mkldnn::impl::cpu::ref_depthwise_scalar_fwd_t>> depthwise_injectors_ref;
 
     jit_normalize_config_params jcp = {};
+
+    static const size_t DATA = 0;
+    static const size_t AXES = 1;
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp
new file mode 100644
index 00000000000..8f164c33c18
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp
@@ -0,0 +1,147 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "utils/bfloat16.hpp"
+#include <mkldnn_selective_build.h>
+#include "mkldnn_one_hot_node.h"
+#include <nodes/common/tensor_desc_creator.h>
+#include <ngraph/opsets/opset1.hpp>
+#include "common/cpu_memcpy.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNOneHotNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto oneHot = std::dynamic_pointer_cast<const ngraph::opset1::OneHot>(op);
+        if (!oneHot) {
+            errorMessage = "Only opset1 OneHot operation is supported";
+            return false;
+        }
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(DEPTH_ID)) == nullptr) {
+            errorMessage = "Only const 'depth' input is supported";
+            return false;
+        }
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(ON_VALUE_ID)) == nullptr) {
+            errorMessage = "Only const 'on_value' input is supported";
+            return false;
+        }
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(OFF_VALUEAXES_ID)) == nullptr) {
+            errorMessage = "Only const 'off_value' input is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNOneHotNode::MKLDNNOneHotNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = "OneHot layer with name '" + op->get_friendly_name() + "'";
+    const auto oneHot = std::dynamic_pointer_cast<const ngraph::opset1::OneHot>(op);
+    const auto depthNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(DEPTH_ID));
+    const auto onValueNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(ON_VALUE_ID));
+    const auto offValueNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(oneHot->get_input_node_shared_ptr(OFF_VALUEAXES_ID));
+    depth = depthNode->cast_vector<uint32_t>()[0];
+    axis = oneHot->get_axis();
+    src_dims = oneHot->get_input_shape(INDICES_ID);
+    if (ngraph::is_scalar(src_dims)) {
+        src_dims = SizeVector{1};
+    }
+    dst_dims = oneHot->get_output_shape(0);
+    if (ngraph::is_scalar(dst_dims)) {
+        dst_dims = SizeVector{1};
+    }
+
+    int output_dims_size = dst_dims.size();
+    if (axis < 0) {
+        axis += output_dims_size;
+    }
+    if (axis < 0 || axis >= output_dims_size) {
+        IE_THROW() << errorPrefix << " has unsupported 'axis' attribute: " << oneHot->get_axis();
+    }
+
+    if (!( ((1 + src_dims.size()) == dst_dims.size()) ||
+           (src_dims.size() == 1 && dst_dims.size() == 1 && dst_dims[0] == depth && src_dims[0] == 1)))
+        IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions!";
+}
+
+void MKLDNNOneHotNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    // check a precision of the input tensor
+    auto input_precision = getOriginalInputPrecisionAtPort(INDICES_ID);
+    if (input_precision != Precision::I32) {
+        IE_THROW() << errorPrefix << " has incorrect input precision for the input. Only I32 is supported!";
+    }
+    output_precision = getOriginalOutputPrecisionAtPort(0);
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_precision},
+                          {TensorDescCreatorTypes::ncsp, input_precision},
+                          {TensorDescCreatorTypes::ncsp, output_precision},
+                          {TensorDescCreatorTypes::ncsp, output_precision}},
+                         {{TensorDescCreatorTypes::ncsp, output_precision}},
+                         impl_desc_type::ref_any);
+}
+
+template<typename out_type>
+void MKLDNNOneHotNode::one_hot(size_t prefix_size, size_t suffix_size) {
+    const auto *src_data = reinterpret_cast<const in_type *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    auto *dst_data = reinterpret_cast<out_type *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    const out_type on_value = reinterpret_cast<const out_type *>(getParentEdgeAt(2)->getMemoryPtr()->GetPtr())[0];
+    const out_type off_value = reinterpret_cast<const out_type *>(getParentEdgeAt(3)->getMemoryPtr()->GetPtr())[0];
+
+    // fill the output with off_value
+    std::size_t dst_size = prefix_size * depth * suffix_size;
+    std::fill(dst_data, dst_data + dst_size, off_value);
+
+    // set on_value at needed locations
+    auto on_val = on_value;
+    parallel_for(prefix_size, [&](std::size_t prefix_idx) {
+        const in_type* src_dataPtr = &src_data[prefix_idx * suffix_size];
+        out_type* dst_dataPtr = &dst_data[prefix_idx * depth * suffix_size];
+        for (std::size_t suffix_idx = 0; suffix_idx < suffix_size; ++suffix_idx, ++src_dataPtr, ++dst_dataPtr) {
+            auto v = static_cast<std::size_t>(*src_dataPtr);
+            if (v < depth) {
+                dst_dataPtr[v * suffix_size] = on_val;
+            }
+        }
+    });
+}
+
+void MKLDNNOneHotNode::execute(mkldnn::stream strm) {
+    std::size_t prefix_size = 1;
+    auto input_dims = getParentEdgeAt(0)->getDesc().getDims();
+
+    std::size_t actual_axis = (axis == -1) ? src_dims.size() : axis;
+    for (size_t i = 0; i < actual_axis; ++i)
+        prefix_size *= input_dims[i];
+
+    std::size_t suffix_size = getParentEdgeAt(0)->getBlob()->size() / prefix_size;
+
+    OneHotContext ctx = {this, prefix_size, suffix_size};
+    OV_SWITCH(MKLDNNPlugin, OneHotExecute, ctx, output_precision.size(),
+              OV_CASE(sizeof(uint32_t), uint32_t),
+              OV_CASE(sizeof(uint16_t), uint16_t),
+              OV_CASE(sizeof(uint8_t), uint8_t))
+}
+
+bool MKLDNNOneHotNode::created() const {
+    return getType() == OneHot;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNOneHotNode, OneHot)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.h
new file mode 100644
index 00000000000..ee355ce26a3
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.h
@@ -0,0 +1,63 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+#include <ie_blob.h>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNOneHotNode : public MKLDNNNode {
+public:
+    MKLDNNOneHotNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNOneHotNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    typedef InferenceEngine::PrecisionTrait<InferenceEngine::Precision::I32>::value_type in_type;
+
+    struct OneHotContext {
+        MKLDNNOneHotNode* nodePtr;
+        size_t prefix_size;
+        size_t suffix_size;
+    };
+
+    template<typename dst_t>
+    struct OneHotExecute {
+        void operator()(OneHotContext & ctx) {
+            ctx.nodePtr->one_hot<dst_t>(ctx.prefix_size, ctx.suffix_size);
+        }
+    };
+
+    uint32_t depth;
+    int32_t axis = -1;
+    InferenceEngine::SizeVector src_dims;
+    InferenceEngine::SizeVector dst_dims;
+
+    InferenceEngine::Precision output_precision;
+
+    std::string errorPrefix;
+
+    static const size_t INDICES_ID = 0;
+    static const size_t DEPTH_ID = 1;
+    static const size_t ON_VALUE_ID = 2;
+    static const size_t OFF_VALUEAXES_ID = 3;
+
+    template<typename out_type>
+    void one_hot(size_t prefix_size, size_t suffix_size);
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
index b4e812838f5..b4ef82481ca 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
@@ -3,7 +3,6 @@
 //
 
 #include "mkldnn_pad_node.h"
-#include <legacy/ie_layers.h>
 #include <string>
 #include <cmath>
 #include <mkldnn_types.h>
@@ -13,54 +12,102 @@
 #include "common/cpu_memcpy.h"
 #include "utils/bfloat16.hpp"
 #include <mkldnn_selective_build.h>
+#include <ngraph/opsets/opset1.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNPadNode::MKLDNNPadNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
-
-void MKLDNNPadNode::getSupportedDescriptors() {
-    auto* padLayer = dynamic_cast<PadLayer*>(getCnnLayer().get());
-    if (padLayer == nullptr)
-        IE_THROW() << "Cannot convert Pad layer.";
-
-    padsBegin = padLayer->GetParamAsUInts("pads_begin");
-    padsEnd = padLayer->GetParamAsUInts("pads_end");
-
-    SizeVector srcDims = padLayer->insData[0].lock()->getTensorDesc().getDims();
-    SizeVector dstDims = padLayer->outData[0]->getTensorDesc().getDims();
-    if (srcDims.size() != dstDims.size() || padsBegin.size() != srcDims.size() || padsEnd.size() != srcDims.size())
-        IE_THROW() << padLayer->name << " Incorrect number of input/output dimensions!";
-
-    std::string pad_mode = padLayer->GetParamAsString("pad_mode");
-    if (pad_mode == "constant") {
-        padMode = CONSTANT;
-        padValue = padLayer->GetParamAsFloat("pad_value", 0.f);
-    } else if (pad_mode == "edge") {
-        padMode = EDGE;
-    } else if (pad_mode == "reflect") {
-        padMode = REFLECT;
-        for (size_t i = 0; i < srcDims.size(); i++) {
-            if ((srcDims[i] - 1) < padsBegin[i] || (srcDims[i] - 1) < padsEnd[i])
-                IE_THROW() << padLayer->name << " Incorrect padsBegin or padsEnd for 'reflect' pad mode";
+bool MKLDNNPadNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto pad = std::dynamic_pointer_cast<const ngraph::opset1::Pad>(op);
+        if (!pad) {
+            errorMessage = "Only opset1 Pad operation is supported";
+            return false;
         }
-    } else if (pad_mode == "symmetric") {
-        padMode = SYMMETRIC;
-        for (size_t i = 0; i < srcDims.size(); i++) {
-            if (srcDims[i] < padsBegin[i] || srcDims[i] < padsEnd[i])
-                IE_THROW() << padLayer->name << " Incorrect padsBegin or padsEnd for 'symmetric' pad mode";
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(pad->get_input_node_shared_ptr(PADS_BEGIN_ID)) == nullptr ||
+            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(pad->get_input_node_shared_ptr(PADS_END_ID)) == nullptr ||
+            (pad->get_pad_mode() == ngraph::op::PadMode::CONSTANT && pad->get_input_size() == 4 &&
+                std::dynamic_pointer_cast<const ngraph::opset1::Constant>(pad->get_input_node_shared_ptr(PAD_VALUE_ID)) == nullptr)) {
+            errorMessage = "Only Constant operation on 'pads_begin', 'pads_end', 'pad_value' inpus is supported";
+            return false;
+        }
+        const auto pad_mode = pad->get_pad_mode();
+        if (pad_mode != ngraph::op::PadMode::CONSTANT && pad_mode != ngraph::op::PadMode::EDGE && pad_mode != ngraph::op::PadMode::REFLECT &&
+                pad_mode != ngraph::op::PadMode::SYMMETRIC) {
+            errorMessage = "Has unsupported pad_mode: " + ngraph::as_string(pad_mode);
+            return false;
+        }
+        const auto pb = pad->get_pads_begin();
+        const auto pe = pad->get_pads_end();
+        if (std::count_if(pb.begin(), pb.end(), [](ptrdiff_t x) { return x < 0; }) != 0 ||
+                std::count_if(pe.begin(), pe.end(), [](ptrdiff_t x) { return x < 0; }) != 0) {
+            errorMessage = "Doesn't support 'pads_begin' or 'pads_end' negative value";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNPadNode::MKLDNNPadNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "Pad node with name '" + op->get_friendly_name() + "'";
+        const auto pad = std::dynamic_pointer_cast<const ngraph::opset1::Pad>(op);
+
+        const auto pb = pad->get_pads_begin();
+        const auto pe = pad->get_pads_end();
+        for (size_t i = 0; i < pb.size(); i++)
+            padsBegin.push_back(static_cast<unsigned int>(pb[i]));
+         for (size_t i = 0; i < pe.size(); i++)
+            padsEnd.push_back(static_cast<unsigned int>(pe[i]));
+
+        const auto pad_mode = pad->get_pad_mode();
+        isPadValueSpecified = pad->get_input_size() == 4;
+        if (pad_mode == ngraph::op::PadMode::CONSTANT) {
+            padMode = CONSTANT;
+            if (isPadValueSpecified) {
+                if (!ngraph::is_scalar(pad->get_input_shape(PAD_VALUE_ID)))
+                    IE_THROW() << errorPrefix << " has non scalar 'pad_value' input";
+                padValue = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(pad->get_input_node_shared_ptr(PAD_VALUE_ID))->cast_vector<float>()[0];
+            }
+        } else if (pad_mode == ngraph::op::PadMode::EDGE) {
+            padMode = EDGE;
+        } else if (pad_mode == ngraph::op::PadMode::REFLECT) {
+            padMode = REFLECT;
+        } else if (pad_mode == ngraph::op::PadMode::SYMMETRIC) {
+            padMode = SYMMETRIC;
         }
     } else {
-        IE_THROW() << padLayer->name
-                           << " Incorrect pad_mode. Only constants|edge|reflect|symmetric modes are supported!";
+        IE_THROW(NotImplemented) << errorMessage;
     }
+}
 
-    if (getParentEdges().size() != 1)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
+void MKLDNNPadNode::getSupportedDescriptors() {
+    if (getParentEdges().size() != 3 && getParentEdges().size() != 4)
+        IE_THROW() << errorPrefix << " has incorrect number of input edges";
     if (getChildEdges().empty())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
+        IE_THROW() << errorPrefix << "Incorrect number of output edges";
+
+    const SizeVector srcDims = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector();
+    const SizeVector dstDims = getChildEdgeAt(DATA_ID)->getDims().ToSizeVector();
+    if (srcDims.size() != dstDims.size() || padsBegin.size() != srcDims.size() || padsEnd.size() != srcDims.size())
+        IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions!";
+
+    if (padMode == REFLECT) {
+        for (size_t i = 0; i < srcDims.size(); i++) {
+            if ((srcDims[i] - 1) < padsBegin[i] || (srcDims[i] - 1) < padsEnd[i])
+                IE_THROW() << errorPrefix << " has incorrect padsBegin or padsEnd for 'reflect' pad mode";
+        }
+    } else if (padMode == SYMMETRIC) {
+        for (size_t i = 0; i < srcDims.size(); i++) {
+            if (srcDims[i] < padsBegin[i] || srcDims[i] < padsEnd[i])
+                IE_THROW() << errorPrefix << " has incorrect padsBegin or padsEnd for 'symmetric' pad mode";
+        }
+    }
 }
 
 void MKLDNNPadNode::initSupportedPrimitiveDescriptors() {
@@ -70,26 +117,26 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() {
     std::vector<InferenceEngine::Precision> supportedPrecisions = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::I32,
                                                                    InferenceEngine::Precision::BF16, InferenceEngine::Precision::I8,
                                                                    InferenceEngine::Precision::U8};
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(DATA_ID);
     if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), precision) == supportedPrecisions.end())
         precision = precision.is_float() ? InferenceEngine::Precision::FP32 : InferenceEngine::Precision::I32;
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
-    auto srcDims = getParentEdgeAt(0)->getDims();
+    auto srcDims = getParentEdgeAt(DATA_ID)->getDims();
     int numOfDims = srcDims.ToSizeVector().size();
 
     InferenceEngine::LayerConfig config;
     config.dynBatchSupport = false;
-    config.inConfs.resize(1);
+    config.inConfs.resize(isPadValueSpecified ? 4 : 3);
     config.outConfs.resize(1);
-    config.inConfs[0].inPlace = -1;
-    config.inConfs[0].constant = false;
-    config.outConfs[0].inPlace = -1;
-    config.outConfs[0].constant = false;
 
     auto pushSupportedPrimitiveDescriptor = [&](memory::format_tag memoryFormat) {
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), dataType, memoryFormat);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, memoryFormat);
+        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), dataType, memoryFormat);
+        config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(PADS_BEGIN_ID)->getDims(), memory::data_type::s32, memory::format_tag::x);
+        config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(PADS_END_ID)->getDims(), memory::data_type::s32, memory::format_tag::x);
+        if (isPadValueSpecified)
+            config.inConfs[3].desc = MKLDNNMemoryDesc(getParentEdgeAt(PAD_VALUE_ID)->getDims(), memory::data_type::f32, memory::format_tag::x);
+        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(DATA_ID)->getDims(), dataType, memoryFormat);
         supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, memoryFormat});
     };
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h
index 1c598e497d0..8be96b2bea6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h
@@ -12,7 +12,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNPadNode : public MKLDNNNode {
 public:
-    MKLDNNPadNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNPadNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNPadNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -21,6 +21,8 @@ public:
     void execute(mkldnn::stream strm) override;
     bool created() const override;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     enum PadMode {
         CONSTANT = 0,
@@ -63,6 +65,14 @@ private:
             node->padConstantCommon<T>();
         }
     };
+
+    std::string errorPrefix;
+    static const size_t DATA_ID = 0;
+    static const size_t PADS_BEGIN_ID = 1;
+    static const size_t PADS_END_ID = 2;
+    static const size_t PAD_VALUE_ID = 3;
+
+    bool isPadValueSpecified = false;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
index 2f5c459fdd8..c7a007d0c6f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
@@ -4,25 +4,61 @@
 
 #include "mkldnn_pooling_node.h"
 
-#include "mkldnn_quantize_node.h"
+#include "mkldnn_fake_quantize_node.h"
 #include "mkldnn_conv_node.h"
 #include "mkldnn_concat_node.h"
-#include <legacy/ie_layers.h>
 #include <mkldnn.hpp>
 #include <string>
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <legacy/ie_layers_internal.hpp>
 #include <utils/general_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNPoolingNode::MKLDNNPoolingNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
-        MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    auto maxPoolOp = ngraph::as_type_ptr<ngraph::op::v1::MaxPool>(op);
+    auto avgPoolOp = ngraph::as_type_ptr<ngraph::op::v1::AvgPool>(op);
+    if (maxPoolOp) {
+        algorithm = PoolingMax;
+        exclude_pad = false;
+
+        for (int i = 0; i < maxPoolOp->get_strides().size(); i++) {
+            stride.push_back(static_cast<ptrdiff_t>(maxPoolOp->get_strides()[i]));
+        }
+        for (int i = 0; i < maxPoolOp->get_kernel().size(); i++) {
+            kernel.push_back(static_cast<ptrdiff_t>(maxPoolOp->get_kernel()[i]));
+        }
+        for (int i = 0; i < maxPoolOp->get_pads_begin().size(); i++) {
+            data_pad_begin.push_back(static_cast<ptrdiff_t>(maxPoolOp->get_pads_begin()[i]));
+        }
+        for (int i = 0; i < maxPoolOp->get_pads_end().size(); i++) {
+            data_pad_end.push_back(static_cast<ptrdiff_t>(maxPoolOp->get_pads_end()[i]));
+        }
+    } else if (avgPoolOp) {
+        algorithm = PoolingAvg;
+        exclude_pad = avgPoolOp->get_exclude_pad();
+
+        for (int i = 0; i < avgPoolOp->get_strides().size(); i++) {
+            stride.push_back(static_cast<ptrdiff_t>(avgPoolOp->get_strides()[i]));
+        }
+        for (int i = 0; i < avgPoolOp->get_kernel().size(); i++) {
+            kernel.push_back(static_cast<ptrdiff_t>(avgPoolOp->get_kernel()[i]));
+        }
+        for (int i = 0; i < avgPoolOp->get_pads_begin().size(); i++) {
+            data_pad_begin.push_back(static_cast<ptrdiff_t>(avgPoolOp->get_pads_begin()[i]));
+        }
+        for (int i = 0; i < avgPoolOp->get_pads_end().size(); i++) {
+            data_pad_end.push_back(static_cast<ptrdiff_t>(avgPoolOp->get_pads_end()[i]));
+        }
+    } else {
+        IE_THROW(NotImplemented)
+                << "CPU Pooling node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name();
+    }
+}
 
 std::vector<memory::format_tag> MKLDNNPoolingNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const {
     if (dims.ndims() == 0)
@@ -44,27 +80,21 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
     if (!descs.empty())
         return;
 
-    auto * poolingLayer = dynamic_cast<PoolingLayer*>(getCnnLayer().get());
-    if (poolingLayer == nullptr)
-        IE_THROW() << "Cannot convert pooling layer.";
-
     if (getParentEdges().size() != 1)
         IE_THROW() << "Incorrect number of input edges for layer " << getName();
     if (getChildEdges().empty())
         IE_THROW() << "Incorrect number of output edges for layer " << getName();
 
-    type = poolingLayer->_type;
-    exclude_pad = poolingLayer->_exclude_pad;
+    inputPrecision = getOriginalInputPrecisionAtPort(0);
+    outputPrecision = getOriginalOutputPrecisionAtPort(0);
 
-    inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
-    outputPrecision = getCnnLayer()->outData[0]->getPrecision();
-    // Dirty WA to support stat based quantization approach
-    if (this->getCnnLayer()->precision != Precision::I8
-        && inputPrecision != Precision::BF16) {
-        if (type == PoolingLayer::MAX) {
+    // WA: LPT transformation has WA which allows average pooling has I8/U8 output precision instead of FP32,
+    // so we explicitly set output precision as FP32
+    if (outputPrecision != Precision::I8 && inputPrecision != Precision::BF16) {
+        if (getAlgorithm() == PoolingMax) {
             // MKLDNN supports only equal precisions for input and output
             outputPrecision = inputPrecision;
-        } else if (type == PoolingLayer::AVG) {
+        } else if (getAlgorithm() == PoolingAvg) {
             outputPrecision = Precision::FP32;
         }
     }
@@ -73,20 +103,12 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
     }
 
     if (!fusedWith.empty()) {
-        auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
-        if (lastFusedLayer) {
-            outputPrecision = lastFusedLayer->outData[0]->getPrecision();
-        }
+        outputPrecision = fusedWith.back()->getOriginalOutputPrecisionAtPort(0);
     }
 
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision);
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision);
 
-    invertVectorCopyUtoI(poolingLayer->_stride, stride);
-    invertVectorCopyUtoI(poolingLayer->_kernel, kernel);
-    auto allPads = getPaddings(*poolingLayer);
-    invertVectorCopyUtoI(allPads.begin, data_pad_begin);
-    invertVectorCopyUtoI(allPads.end, data_pad_end);
     effective_pad_begin = data_pad_begin;
     effective_pad_end.resize(data_pad_end.size());
 
@@ -155,8 +177,8 @@ void MKLDNNPoolingNode::createDescriptor(const std::vector<InferenceEngine::Tens
     MKLDNNMemoryDesc in_candidate(inputDesc[0]);
     MKLDNNMemoryDesc out_candidate(outputDesc[0]);
 
-    algorithm alg;
-    if (type == PoolingLayer::PoolType::AVG) {
+    mkldnn::algorithm alg;
+    if (algorithm == PoolingAvg) {
         bool not_zero_l = false;
         for (auto lr : data_pad_begin) {
             if (lr) {
@@ -172,13 +194,12 @@ void MKLDNNPoolingNode::createDescriptor(const std::vector<InferenceEngine::Tens
             }
         }
         if (!exclude_pad && (not_zero_l || not_zero_r))
-            alg = algorithm::pooling_avg_include_padding;
+            alg = mkldnn::algorithm::pooling_avg_include_padding;
         else
-            alg = algorithm::pooling_avg_exclude_padding;
-    } else if (type == PoolingLayer::PoolType::MAX) {
-        alg = algorithm::pooling_max;
+            alg = mkldnn::algorithm::pooling_avg_exclude_padding;
+    } else if (algorithm == PoolingMax) {
+        alg = mkldnn::algorithm::pooling_max;
     } else {
-        // TODO: Handle rest of the possible: STOCH, ROI, SPACIAL_PYRAMID
         IE_THROW() << "Unsupported pooling type";
     }
 
@@ -193,7 +214,7 @@ void MKLDNNPoolingNode::createDescriptor(const std::vector<InferenceEngine::Tens
                                       convert(effective_pad_begin),
                                       convert(effective_pad_end)));
 
-    if (alg == algorithm::pooling_avg_include_padding) {
+    if (alg == mkldnn::algorithm::pooling_avg_include_padding) {
         // In case of AVG including paddings the norm coeff should be calculated
         // with tacking into account original pads. So we need to restore
         // original values for end paddings.
@@ -331,9 +352,9 @@ void MKLDNNPoolingNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeight
     mkldnn::post_ops ops;
 
     for (auto &node : fusedWith) {
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
-        if (quantizeNode) {
-            quantizeNode->appendPostOps(ops);
+        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
+        if (fakeQuantizeNode) {
+            fakeQuantizeNode->appendPostOps(ops);
             continue;
         }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
index 7725f470311..5cb524fa817 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
@@ -14,7 +14,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNPoolingNode : public MKLDNNNode {
 public:
-    MKLDNNPoolingNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNPoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNPoolingNode() override = default;
 
     void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
@@ -32,7 +32,6 @@ public:
 private:
     void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false);
 
-    InferenceEngine::PoolingLayer::PoolType type = InferenceEngine::PoolingLayer::MAX;
     bool exclude_pad = false;
     std::vector<ptrdiff_t> stride;
     std::vector<ptrdiff_t> kernel;
@@ -52,8 +51,6 @@ private:
 
     InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::FP32;
     InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;
-
-    std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp
new file mode 100644
index 00000000000..521dd248bdd
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp
@@ -0,0 +1,558 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "utils/bfloat16.hpp"
+#include <mkldnn_selective_build.h>
+#include <ngraph/opsets/opset1.hpp>
+#include "mkldnn_psroi_pooling_node.h"
+#include <cpu/x64/jit_generator.hpp>
+#include <nodes/common/tensor_desc_creator.h>
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+using namespace mkldnn;
+using namespace mkldnn::impl;
+using namespace mkldnn::impl::cpu::x64;
+using namespace mkldnn::impl::utils;
+
+bool MKLDNNPSROIPoolingNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto psroi = std::dynamic_pointer_cast<const ngraph::opset1::PSROIPooling>(op);
+        const auto defPsroi = std::dynamic_pointer_cast<const ngraph::opset1::DeformablePSROIPooling>(op);
+        if (!psroi && !defPsroi) {
+            errorMessage = "Only opset1 PSROIPooling and DeformablePSROIPooling operations are supported";
+            return false;
+        }
+
+        std::string mode;
+        if (psroi) {
+            mode = psroi->get_mode();
+            if (mode != "average" && mode != "bilinear") {
+                errorMessage = "Doesn't support mode: " + mode;
+                return false;
+            }
+        } else if (defPsroi) {
+            mode = defPsroi->get_mode();
+            if (mode != "bilinear_deformable") {
+                errorMessage = "Doesn't support mode: " + mode;
+                return false;
+            }
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNPSROIPoolingNode::MKLDNNPSROIPoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = std::string(op->get_type_name()) + " node with name '" + op->get_friendly_name() + "'";
+
+    const auto psroi = std::dynamic_pointer_cast<const ngraph::opset1::PSROIPooling>(op);
+    const auto defPsroi = std::dynamic_pointer_cast<const ngraph::opset1::DeformablePSROIPooling>(op);
+
+    noTrans = op->get_input_size() == 2;
+    if (op->get_input_shape(0).size() != 4)
+        IE_THROW() << errorPrefix << " has first input with incorrect rank: " + std::to_string(op->get_input_shape(0).size());
+    if (op->get_input_shape(1).size() != 2)
+        IE_THROW() << errorPrefix << " has second input with incorrect rank: " + std::to_string(op->get_input_shape(1).size());
+    if (!noTrans && op->get_input_shape(2).size() != 4)
+        IE_THROW() << errorPrefix << " has third input with incorrect rank: " + std::to_string(op->get_input_shape(2).size());
+
+    if (psroi) {
+        if (psroi->get_input_size() != 2)
+            IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+        mode = psroi->get_mode();
+        if (mode == "average") {
+            algorithm = Algorithm::PSROIPoolingAverage;
+        } else if (mode == "bilinear") {
+            algorithm = Algorithm::PSROIPoolingBilinear;
+        }
+
+        outputDim = static_cast<size_t>(psroi->get_output_dim());
+        spatialScale = psroi->get_spatial_scale();
+        groupSize = static_cast<size_t>(psroi->get_group_size());
+        mode = psroi->get_mode();
+        spatialBinsX = static_cast<size_t>(psroi->get_spatial_bins_x());
+        spatialBinsY = static_cast<size_t>(psroi->get_spatial_bins_y());
+        pooledHeight = groupSize;
+        pooledWidth = groupSize;
+
+    } else if (defPsroi) {
+        if (defPsroi->get_input_size() != 2 && defPsroi->get_input_size() != 3)
+            IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+        algorithm = Algorithm::PSROIPoolingBilinearDeformable;
+
+        outputDim = static_cast<size_t>(defPsroi->get_output_dim());
+        spatialScale = defPsroi->get_spatial_scale();
+        groupSize = static_cast<size_t>(defPsroi->get_group_size());
+        mode = defPsroi->get_mode();
+        spatialBinsX = static_cast<size_t>(defPsroi->get_spatial_bins_x());
+        spatialBinsY = static_cast<size_t>(defPsroi->get_spatial_bins_y());
+        transStd = defPsroi->get_trans_std();
+        partSize = static_cast<size_t>(defPsroi->get_part_size());
+        // temporary workaround due to incorrect usage of group_size in the nGraph operation for the DeformablePSROIPooling
+        pooledHeight = groupSize;
+        pooledWidth = groupSize;
+    }
+
+    ngraph::Shape inDims = op->get_input_shape(0);
+    channels = static_cast<int>(inDims[1]);
+    height = static_cast<int>(inDims[2]);
+    width = static_cast<int>(inDims[3]);
+
+    ngraph::Shape outDims = op->get_shape();
+    nn = static_cast<int>(outDims[0]);
+    nc = static_cast<int>(outDims[1]);
+    nh = static_cast<int>(outDims[2]);
+    nw = static_cast<int>(outDims[3]);
+}
+
+void MKLDNNPSROIPoolingNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    impl_desc_type impl_type;
+    if (mayiuse(cpu::x64::avx512_common)) {
+        impl_type = impl_desc_type::jit_avx512;
+    } else if (mayiuse(cpu::x64::avx2)) {
+        impl_type = impl_desc_type::jit_avx2;
+    } else if (mayiuse(cpu::x64::sse41)) {
+        impl_type = impl_desc_type::jit_sse42;
+    } else {
+        impl_type = impl_desc_type::ref;
+    }
+
+    auto dataPrecision = getOriginalInputPrecisionAtPort(0) == Precision::BF16 ? Precision::BF16 : Precision::FP32;
+
+    if (getAlgorithm() == Algorithm::PSROIPoolingAverage || getAlgorithm() == Algorithm::PSROIPoolingBilinear) {
+        std::vector<std::pair<TensorDescCreatorTypes, TensorDescCreatorTypes>> dataFomats{
+            {TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::ncsp},
+            {TensorDescCreatorTypes::nspc, TensorDescCreatorTypes::nspc},
+            {TensorDescCreatorTypes::nCsp16c, TensorDescCreatorTypes::nCsp16c},
+            {TensorDescCreatorTypes::nCsp8c, TensorDescCreatorTypes::nCsp8c}
+        };
+
+        for (const auto &df : dataFomats) {
+            addSupportedPrimDesc({{df.first, dataPrecision}, {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                                 {{df.second, dataPrecision}},
+                                 impl_type);
+        }
+    } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable && noTrans) {
+        addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision}, {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                             {{TensorDescCreatorTypes::ncsp, dataPrecision}},
+                             impl_type);
+    } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable) {
+        addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision},
+                              {TensorDescCreatorTypes::ncsp, Precision::FP32},
+                              {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                             {{TensorDescCreatorTypes::ncsp, dataPrecision}},
+                             impl_type);
+    }
+}
+
+template <typename inputType>
+inline float bilinearInterp(const inputType* data, const float x, const float y, const int width_) {
+    int x1 = static_cast<int>(std::floor(x));
+    int x2 = static_cast<int>(std::ceil(x));
+    int y1 = static_cast<int>(std::floor(y));
+    int y2 = static_cast<int>(std::ceil(y));
+    float distX = x - x1;
+    float distY = y - y1;
+
+    float value11 = data[y1 * width_ + x1];
+    float value12 = data[y2 * width_ + x1];
+    float value21 = data[y1 * width_ + x2];
+    float value22 = data[y2 * width_ + x2];
+    float value = (1 - distX) * (1 - distY) * value11 + (1 - distX) * distY * value12
+                  + distX * (1 - distY) * value21 + distX * distY * value22;
+    return value;
+}
+
+void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const TensorDesc& dstDesc,
+                                          int& hInputStride, int& wInputStride,
+                                          int& hOutputStride, int& wOutputStride,
+                                          Layout& inFmt, Layout& outFmt,
+                                          int& inBlockSize, int& outBlockSize,
+                                          int& outBlockCount,
+                                          unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding) {
+    inFmt = srcDesc.getLayout();
+    outFmt = dstDesc.getLayout();
+    int expectedInBlockDimsSize = (inFmt == Layout::BLOCKED ? 5 : 4);
+    int expectedOutBlockDimsSize = (outFmt == Layout::BLOCKED ? 5 : 4);
+    auto inBlkDims = srcDesc.getBlockingDesc().getBlockDims();
+    auto outBlkDims = dstDesc.getBlockingDesc().getBlockDims();
+    if (inBlkDims.size() != expectedInBlockDimsSize)
+        IE_THROW() << errorPrefix << " has unexpected size of blocking dims in input (given " << inBlkDims.size() << ", expected "
+                          << expectedInBlockDimsSize << ")";
+    if (outBlkDims.size() != expectedOutBlockDimsSize)
+        IE_THROW() << errorPrefix << " has unexpected size of blocking dims in output (given " << outBlkDims.size() << ", expected "
+                           << expectedOutBlockDimsSize << ")";
+
+    inBlockSize = (inFmt == Layout::BLOCKED ? srcDesc.getBlockingDesc().getBlockDims()[4] : 1);
+    outBlockSize = (outFmt == Layout::BLOCKED ? dstDesc.getBlockingDesc().getBlockDims()[4] : 1);
+    inputChannelsPadding = srcDesc.getBlockingDesc().getBlockDims()[1] * inBlockSize;
+    outputChannelsPadding = dstDesc.getBlockingDesc().getBlockDims()[1] * outBlockSize;
+    outBlockCount = outputChannelsPadding / outBlockSize;
+
+    int hOutStrIndex = 0, wOutStrIndex = 0, hInStrIndex = 0, wInStrIndex = 0;
+    const auto& outOrder = dstDesc.getBlockingDesc().getOrder();
+    const auto& inOrder = srcDesc.getBlockingDesc().getOrder();
+    for (int i = 0; i < outOrder.size(); i++) {
+        if (outOrder[i] == 2) hOutStrIndex = i;
+        if (outOrder[i] == 3) wOutStrIndex = i;
+    }
+    for (int i = 0; i < inOrder.size(); i++) {
+        if (inOrder[i] == 2) hInStrIndex = i;
+        if (inOrder[i] == 3) wInStrIndex = i;
+    }
+    hInputStride = srcDesc.getBlockingDesc().getStrides()[hInStrIndex];
+    wInputStride = srcDesc.getBlockingDesc().getStrides()[wInStrIndex];
+    hOutputStride = dstDesc.getBlockingDesc().getStrides()[hOutStrIndex];
+    wOutputStride = dstDesc.getBlockingDesc().getStrides()[wOutStrIndex];
+}
+
+template <typename inputType, typename outputType>
+void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois,
+                                            const int n, const int roiBatchInd,
+                                            const TensorDesc& srcDesc, const TensorDesc& dstDesc) {
+    Layout inFmt, outFmt;
+    int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride;
+    unsigned long inputChannelsPadding, outputChannelsPadding;
+    unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride,
+        inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
+    const float roiStartW = static_cast<float>(round(bottomRois[1])) * spatialScale;
+    const float roiStartH = static_cast<float>(round(bottomRois[2])) * spatialScale;
+    const float roiEndW   = static_cast<float>(round(bottomRois[3] + 1.0f)) * spatialScale;
+    const float roiEndH   = static_cast<float>(round(bottomRois[4] + 1.0f)) * spatialScale;
+    // Force too small ROIs to be 1x1
+    const float roiWidth  = std::max<float>(roiEndW - roiStartW, 0.1f);  // avoid 0
+    const float roiHeight = std::max<float>(roiEndH - roiStartH, 0.1f);
+
+    auto avgPsroi = [&] (int c, int h, int w, int binOffIn, int binOffOut, int inBlkRes, int outBlkRes) {
+        float binSizeH = roiHeight / static_cast<float>(pooledHeight);
+        float binSizeW = roiWidth / static_cast<float>(pooledWidth);
+
+        int hStart = static_cast<int>(floor(static_cast<float>(h + 0) * binSizeH + roiStartH));
+        int hEnd = static_cast<int>(ceil(static_cast<float>(h + 1) * binSizeH + roiStartH));
+
+        hStart = std::min<int>(std::max<int>(hStart, 0), height);
+        hEnd = std::min<int>(std::max<int>(hEnd, 0), height);
+        int wStart = static_cast<int>(floor(static_cast<float>(w + 0) * binSizeW + roiStartW));
+        int wEnd = static_cast<int>(ceil(static_cast<float>(w + 1) * binSizeW + roiStartW));
+
+        wStart = std::min<int>(std::max<int>(wStart, 0), width);
+        wEnd = std::min<int>(std::max<int>(wEnd, 0), width);
+
+        const float binArea = static_cast<float>((hEnd - hStart) * (wEnd - wStart));
+
+        size_t dstIndex = binOffOut + h * hOutputStride + w * wOutputStride + outBlkRes;
+        dstData[dstIndex] = 0;
+        if (binArea) {
+            float outSum = 0.0f;
+            const int heightIndexBound = hEnd * hInputStride;
+            const int widthIndexBound = wEnd * wInputStride;
+            for (int hh = hStart * hInputStride; hh < heightIndexBound; hh += hInputStride) {
+                for (int ww = wStart * wInputStride; ww < widthIndexBound; ww += wInputStride) {
+                    outSum += srcData[binOffIn + hh + ww + inBlkRes];
+                }
+            }
+            dstData[dstIndex] = outSum / binArea;
+        }
+    };
+    if (inFmt == Layout::NHWC) {
+        parallel_for2d(nh, nw, [&](int h, int w) {
+            const int binOffsetOutput = n * nc * nh * nw;
+            const int binOffsetInput = roiBatchInd * channels * height * width;
+            for (int c = 0; c < nc; c++) {
+                const int gc = (c * groupSize + h) * groupSize + w;
+                avgPsroi(c, h, w, 0, 0, binOffsetInput + gc, binOffsetOutput + c);
+            }
+        });
+    } else if (inFmt == Layout::NCHW) {
+        parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
+            const int gc = (c * groupSize + h) * groupSize + w;
+            const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize);
+            const int outputBlockIdx = (c / outBlockSize) * outBlockSize;
+            const int binOffsetInput = (roiBatchInd * inputChannelsPadding + gc) * height * width;
+            const int binOffsetOutput = (n * outputChannelsPadding + outputBlockIdx) * nh * nw;
+            avgPsroi(c, h, w, 0, outputBlockResidual, binOffsetInput, binOffsetOutput);
+        });
+    } else {  // nChw16c, nChw8c
+        parallel_for3d(outBlockCount, nh, nw, [&](int blkIdx, int h, int w) {
+            int cStart = blkIdx * outBlockSize;
+            int cEnd = (blkIdx == outBlockCount - 1 ? nc : cStart + outBlockSize);
+            for (int c = cStart; c < cEnd; c++) {
+                const int gc = (c * groupSize + h) * groupSize + w;
+                const int inputBlockResidual = (inFmt == Layout::NCHW ? 0 : gc % inBlockSize);
+                const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize);
+                const int inputBlockIdx = (gc / inBlockSize) * inBlockSize;
+                const int outputBlockIdx = (c / outBlockSize) * outBlockSize;
+                const int binOffsetInput = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width;
+                const int binOffsetOutput = (n * outputChannelsPadding + outputBlockIdx) * nh * nw;
+                avgPsroi(c, h, w, inputBlockResidual, outputBlockResidual, binOffsetInput, binOffsetOutput);
+            }
+        });
+    }
+}
+
+template <typename inputType, typename outputType>
+void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois,
+                                             const int currentRoi, const int roiBatchInd,
+                                             const TensorDesc& srcDesc, const TensorDesc& dstDesc) {
+    Layout inFmt, outFmt;
+    int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride;
+    unsigned long inputChannelsPadding, outputChannelsPadding;
+    unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride,
+                 inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
+    const float roiStartW = bottomRois[1] * spatialScale;
+    const float roiStartH = bottomRois[2] * spatialScale;
+    const float roiEndW = bottomRois[3] * spatialScale;
+    const float roiEndH = bottomRois[4] * spatialScale;
+    const float roiWidth  = roiEndW - roiStartW;
+    const float roiHeight = roiEndH - roiStartH;
+    size_t numBins = spatialBinsX * spatialBinsY;
+    const int binCount = nh * nw;
+
+    auto bilinearPsroi = [&] (int c, int h, int w, int binOffOut, int outBlkRes) {
+        float accum = 0.0f;
+        int binOffIn, inBlkRes;
+        size_t dstIndex = binOffOut + h * hOutputStride + w * wOutputStride + outBlkRes;
+        dstData[dstIndex] = 0;
+
+        for (size_t binY = 0; binY < spatialBinsY; binY++) {
+            const float boxYmin = roiStartH + (binY + 0) * (roiHeight / spatialBinsY);
+            const float boxYmax = roiStartH + (binY + 1) * (roiHeight / spatialBinsY);
+            const float heightScale = nh > 1 ? (boxYmax - boxYmin) * (height - 1) / (pooledHeight - 1) : 0.0f;
+            const float inY = nh > 1 ? (h * heightScale + boxYmin * (height - 1)) : 0.5f * (boxYmin + boxYmax) * (height - 1);
+            for (size_t binX = 0; binX < spatialBinsX; binX++) {
+                size_t gc = c + (binY * spatialBinsX + binX) * nc;
+                if (inFmt == Layout::NHWC) {
+                    binOffIn = roiBatchInd * channels * height * width + gc;
+                    inBlkRes = 0;
+                } else {  // nchw, nChw16c, nChw8c
+                    const int inputBlockIdx = (gc / inBlockSize) * inBlockSize;
+                    binOffIn = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width;
+                    inBlkRes = (inFmt == Layout::BLOCKED ? gc % inBlockSize : 0);
+                }
+                const auto *bottomData = srcData + binOffIn;
+
+                const float boxXmin = roiStartW + (binX + 0) * (roiWidth / spatialBinsX);
+                const float boxXmax = roiStartW + (binX + 1) * (roiWidth / spatialBinsX);
+
+                const float widthScale = nw > 1 ? (boxXmax - boxXmin) * (width - 1) / (pooledWidth - 1) : 0.0f;
+                const float inX = nw > 1 ? (w * widthScale + boxXmin * (width - 1)) : 0.5f * (boxXmin + boxXmax) * (width - 1);
+
+                if (!(inY < 0 || inY > height - 1 || inX < 0 || inX > width - 1)) {
+                    const int topYIndex = static_cast<int>(floorf(inY));
+                    int bottomYIndex = static_cast<int>(ceilf(inY));
+                    const int leftXIndex = static_cast<int>(floorf(inX));
+                    int rightXIndex = static_cast<int>(ceilf(inX));
+
+                    if (rightXIndex > width - 1) rightXIndex = width - 1;
+                    if (bottomYIndex > height - 1) bottomYIndex = height - 1;
+
+                    auto topLeftIndex = topYIndex * hInputStride + leftXIndex * wInputStride + inBlkRes;
+                    auto topRightIndex = topYIndex * hInputStride + rightXIndex * wInputStride + inBlkRes;
+                    auto bottomLeftIndex = bottomYIndex * hInputStride + leftXIndex * wInputStride + inBlkRes;
+                    auto bottomRightIndex = bottomYIndex * hInputStride + rightXIndex * wInputStride + inBlkRes;
+
+                    const float topLeft = bottomData[topLeftIndex];
+                    const float topRight = bottomData[topRightIndex];
+                    const float bottomLeft = bottomData[bottomLeftIndex];
+                    const float bottomRight = bottomData[bottomRightIndex];
+
+                    const float top = topLeft + (topRight - topLeft) * (inX - leftXIndex);
+                    const float bottom = bottomLeft + (bottomRight - bottomLeft) * (inX - leftXIndex);
+
+                    accum += top + (bottom - top) * (inY - topYIndex);
+                }
+            }
+        }
+        accum /= numBins;
+        dstData[dstIndex] = accum;
+    };
+
+    if (inFmt == Layout::NHWC) {
+        const int binOffsetOutput = currentRoi * nc * nh * nw;
+        parallel_for2d(nh, nw, [&](int h, int w) {
+            for (int c = 0; c < nc; c++) {
+                bilinearPsroi(c, h, w, 0, binOffsetOutput + c);
+            }
+        });
+    } else if (inFmt == Layout::NCHW) {
+        parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
+            bilinearPsroi(c, h, w, 0, (currentRoi * outputChannelsPadding + c) * binCount);
+        });
+    } else {  // nChw16c, nChw8c
+        parallel_for3d(outBlockCount, nh, nw, [&](int blkIdx, int h, int w) {
+            int cStart = blkIdx * outBlockSize;
+            int cEnd = (blkIdx == outBlockCount - 1 ? nc : cStart + outBlockSize);
+            for (int c = cStart; c < cEnd; c++) {
+                const int outputBlockIdx = (c / inBlockSize) * inBlockSize;
+                const int binOffsetOutput = (currentRoi * outputChannelsPadding + outputBlockIdx) * binCount;
+                const int outputBlockResidual = (inFmt == Layout::BLOCKED ? c % inBlockSize : 0);
+                bilinearPsroi(c, h, w, outputBlockResidual, binOffsetOutput);
+            }
+        });
+    }
+}
+
+template <typename inputType, typename outputType>
+void MKLDNNPSROIPoolingNode::executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois,
+                                                       const float *bottomTrans, const int numClasses, const int channelsEachClass,
+                                                       const int currentRoi, const int roiBatchInd) {
+    const float roiStartW = static_cast<float>(round(bottomRois[1])) * spatialScale - 0.5f;
+    const float roiStartH = static_cast<float>(round(bottomRois[2])) * spatialScale - 0.5f;
+    const float roiEndW   = static_cast<float>(round(bottomRois[3]) + 1.0f) * spatialScale - 0.5f;
+    const float roiEndH   = static_cast<float>(round(bottomRois[4]) + 1.0f) * spatialScale - 0.5f;
+    // Force too small ROIs to be 1x1
+    const float roiWidth  = std::max<float>(roiEndW - roiStartW, 0.1f);  // avoid 0
+    const float roiHeight = std::max<float>(roiEndH - roiStartH, 0.1f);
+    parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
+        size_t dstIndex = ((currentRoi * nc + c) * nh + h) * nw + w;
+        dstData[dstIndex] = 0;
+        // Compute w and h at bottom
+        float binSizeH = roiHeight / static_cast<float>(pooledHeight);
+        float binSizeW = roiWidth / static_cast<float>(pooledWidth);
+
+        float subBinSizeH = binSizeH / static_cast<float>(spatialBinsX);
+        float subBinSizeW = binSizeW / static_cast<float>(spatialBinsY);
+
+        int partH = h * partSize / pooledHeight;
+        int partW = w * partSize / pooledWidth;
+        int classId = c / channelsEachClass;
+        float transX = noTrans ? 0 :
+                       bottomTrans[(((currentRoi * numClasses + classId) * 2) * partSize + partH)
+                                   * partSize + partW] * transStd;
+        float transY = noTrans ? 0 :
+                       bottomTrans[(((currentRoi * numClasses + classId) * 2 + 1) * partSize + partH)
+                                   * partSize + partW] * transStd;
+
+        float wStart = w * binSizeW + roiStartW + transX * roiWidth;
+        float hStart = h * binSizeH + roiStartH + transY * roiHeight;
+
+        float sum = 0;
+        int count = 0;
+        int gw = w * groupSize / pooledWidth;
+        int gh = h * groupSize / pooledHeight;
+        gw = (std::min)((std::max)(gw, 0), static_cast<int>(groupSize - 1));
+        gh = (std::min)((std::max)(gh, 0), static_cast<int>(groupSize - 1));
+
+        const inputType* offsetBottomData = srcData + (roiBatchInd * channels) * height * width;
+        for (size_t ih = 0; ih < spatialBinsY; ih++) {
+            for (size_t iw = 0; iw < spatialBinsX; iw++) {
+                float w1 = wStart + iw * subBinSizeW;
+                float h1 = hStart + ih * subBinSizeH;
+                // bilinear interpolation
+                if (w1 < -0.5 || w1 > width - 0.5 || h1 < -0.5 || h1 > height - 0.5)
+                    continue;
+                w1 = static_cast<float>((std::min)((std::max)(static_cast<double>(w1), 0.0), width - 1.0));
+                h1 = static_cast<float>((std::min)((std::max)(static_cast<double>(h1), 0.0), height - 1.0));
+                int c1 = static_cast<int>((c * groupSize + gh) * groupSize + gw);
+                float val = bilinearInterp<inputType>(offsetBottomData +
+                                                      c1 * height * width, w1, h1, width);
+
+                sum += val;
+                count++;
+            }
+        }
+        dstData[dstIndex] = count == 0 ? 0 : sum / count;
+    });
+}
+
+template <typename inputType, typename outputType>
+void MKLDNNPSROIPoolingNode::executeSpecified() {
+    const auto *srcData = reinterpret_cast<const inputType *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const auto *bottomRoisBeginning = reinterpret_cast<const float *>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<outputType *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    auto srcDesc = getParentEdgeAt(0)->getDesc();
+    auto dstDesc = getChildEdgeAt(0)->getDesc();
+
+    int realRois = 0;
+    for (; realRois < nn; realRois++) {
+        int roiBatchInd = static_cast<int>(bottomRoisBeginning[realRois * 5]);
+        if (roiBatchInd == -1) {
+            break;
+        }
+    }
+
+    //  for Deformable PSROIPooling
+    const float *bottomTrans = nullptr;
+    int numClasses = 1;
+    int channelsEachClass = outputDim;
+    if (!noTrans) {
+        bottomTrans = reinterpret_cast<const float *>(getParentEdgeAt(2)->getMemoryPtr()->GetPtr());
+        numClasses = static_cast<int>(getParentEdgeAt(2)->getDesc().getDims()[1]) / 2;
+        channelsEachClass /= numClasses;
+    }
+
+    parallel_for(realRois, [&](int currentRoi) {
+        const float *bottomRois = bottomRoisBeginning + currentRoi * 5;
+        int roiBatchInd = static_cast<int>(bottomRois[0]);
+        if (getAlgorithm() == Algorithm::PSROIPoolingAverage) {
+            executeAverage(srcData, dstData, bottomRois, currentRoi, roiBatchInd, srcDesc, dstDesc);
+        } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinear) {
+            executeBilinear(srcData, dstData, bottomRois, currentRoi, roiBatchInd, srcDesc, dstDesc);
+        } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable) {
+            executeBilinearDeformable(srcData, dstData, bottomRois, bottomTrans,
+                    numClasses, channelsEachClass, currentRoi, roiBatchInd);
+        }
+    });
+
+    memset(dstData + realRois * nc * nh * nw, 0, (nn - realRois) * nc * nh * nw * sizeof(outputType));
+}
+
+namespace {
+struct PSROIPoolingContext {
+    MKLDNNPSROIPoolingNode &node;
+};
+}
+
+template<typename T>
+struct MKLDNNPSROIPoolingNode::PSROIPoolingExecute {
+    using srcT = typename std::tuple_element<0, T>::type;
+    using dstT = typename std::tuple_element<1, T>::type;
+
+    void operator()(PSROIPoolingContext & ctx) {
+        ctx.node.executeSpecified<srcT, dstT>();
+    }
+};
+
+void MKLDNNPSROIPoolingNode::execute(mkldnn::stream strm) {
+    auto inputPrec = getParentEdgesAtPort(0)[0]->getDesc().getPrecision();
+    auto outputPrec = getChildEdgesAtPort(0)[0]->getDesc().getPrecision();
+
+    if (!((inputPrec == Precision::BF16 && outputPrec == Precision::BF16) ||
+          (inputPrec == Precision::FP32 && outputPrec == Precision::FP32))) {
+            IE_THROW() << errorPrefix + " has different precisions on input: " + inputPrec.name() + " and output: " + outputPrec.name();
+    }
+
+    PSROIPoolingContext ctx = {
+            *this,
+    };
+
+    OV_SWITCH(MKLDNNPlugin, PSROIPoolingExecute, ctx, std::tie(inputPrec, outputPrec),
+              OV_CASE2(Precision::FP32, Precision::FP32, float, float),
+              OV_CASE2(Precision::BF16, Precision::BF16, bfloat16_t, bfloat16_t))
+}
+
+bool MKLDNNPSROIPoolingNode::created() const {
+    return getType() == PSROIPooling;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNPSROIPoolingNode, PSROIPooling)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h
new file mode 100644
index 00000000000..dc668681e97
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h
@@ -0,0 +1,84 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNPSROIPoolingNode : public MKLDNNNode {
+public:
+    MKLDNNPSROIPoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNPSROIPoolingNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    size_t outputDim = 0;
+    size_t groupSize = 0;
+    float spatialScale = 0;
+    size_t pooledHeight = 0;
+    size_t pooledWidth = 0;
+    size_t spatialBinsX = 0;
+    size_t spatialBinsY = 0;
+    std::string mode = "";
+
+    int channels = 0;
+    int height = 0;
+    int width = 0;
+
+    int nn = 0;
+    int nc = 0;
+    int nh = 0;
+    int nw = 0;
+
+    // for Deformable PSROIPolling
+    bool noTrans;
+    int partSize;
+    float transStd;
+
+    std::string errorPrefix;
+
+    void unpackParams(const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc,
+                      int& hInputStride, int& wInputStride,
+                      int& hOutputStride, int& wOutputStride,
+                      InferenceEngine::Layout& inFmt, InferenceEngine::Layout& outFmt,
+                      int& inBlockSize, int& outBlockSize,
+                      int& outBlockCount,
+                      unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding);
+
+    template <typename inputType, typename outputType>
+    void executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois,
+                        const int n, const int roiBatchInd,
+                        const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc);
+
+    template <typename inputType, typename outputType>
+    void executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois,
+                         const int currentRoi, const int roiBatchInd,
+                         const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc);
+
+    template <typename inputType, typename outputType>
+    void executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois,
+                                   const float *bottomTrans, const int numClasses, const int channelsEachClass,
+                                   const int currentRoi, const int roiBatchInd);
+
+    template <typename inputType, typename outputType>
+    void executeSpecified();
+
+    template<typename T>
+    struct PSROIPoolingExecute;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
index 9a6bd0216dc..1494705b947 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
@@ -4,8 +4,7 @@
 
 #include "mkldnn_reduce_node.h"
 
-#include "mkldnn_quantize_node.h"
-#include <legacy/ie_layers.h>
+#include "mkldnn_fake_quantize_node.h"
 #include <mkldnn.hpp>
 #include <string>
 #include <vector>
@@ -22,6 +21,8 @@
 #include <cpu/x64/jit_uni_depthwise_injector.hpp>
 #include <cpu/x64/jit_uni_quantization_injector.hpp>
 #include <cpu/x64/jit_uni_eltwise_injector.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset4.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -98,15 +99,15 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene
         if (jcp_.planar_layout)
             mov(reg_reduce_w, ptr[reg_params + GET_OFF(reduce_w)]);
 
-        if (jcp_.reduce_mode == Reduce::And || jcp_.reduce_mode == Reduce::L1 || jcp_.reduce_mode == Reduce::Max ||
-            jcp_.reduce_mode == Reduce::Min || jcp_.reduce_mode == Reduce::Prod || jcp_.reduce_mode == Reduce::Or) {
+        if (jcp_.reduce_mode == ReduceAnd || jcp_.reduce_mode == ReduceL1 || jcp_.reduce_mode == ReduceMax ||
+            jcp_.reduce_mode == ReduceMin || jcp_.reduce_mode == ReduceProd || jcp_.reduce_mode == ReduceOr) {
             mov(reg_table, l_table);
         }
 
-        if (isa == cpu::x64::avx512_common || jcp_.reduce_mode == Reduce::And || jcp_.reduce_mode == Reduce::Or)
+        if (isa == cpu::x64::avx512_common || jcp_.reduce_mode == ReduceAnd || jcp_.reduce_mode == ReduceOr)
             uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
 
-        if ((isa == cpu::x64::avx512_common && jcp_.reduce_mode == Reduce::And) || jcp_.reduce_mode == Reduce::Or) {
+        if ((isa == cpu::x64::avx512_common && jcp_.reduce_mode == ReduceAnd) || jcp_.reduce_mode == ReduceOr) {
             uni_vmovups(vmm_aux, table_val(0));
         }
 
@@ -118,10 +119,10 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene
         if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
             emu_vcvtneps2bf16->emit_data();
 
-        if (jcp_.reduce_mode == Reduce::And || jcp_.reduce_mode == Reduce::L1 || jcp_.reduce_mode == Reduce::Max ||
-            jcp_.reduce_mode == Reduce::Min || jcp_.reduce_mode == Reduce::Prod || jcp_.reduce_mode == Reduce::Or) {
+        if (jcp_.reduce_mode == ReduceAnd || jcp_.reduce_mode == ReduceL1 || jcp_.reduce_mode == ReduceMax ||
+            jcp_.reduce_mode == ReduceMin || jcp_.reduce_mode == ReduceProd || jcp_.reduce_mode == ReduceOr) {
             prepare_aux_table();
-        } else if (jcp_.reduce_mode == Reduce::LogSumExp) {
+        } else if (jcp_.reduce_mode == ReduceLogSumExp) {
             exp_injector->prepare_table();
         }
     }
@@ -168,7 +169,7 @@ private:
     inline void reduce_main() {
         // ================================================================
         // ***isa: AVX512***
-        // Reduce::And (Logical And)
+        // ReduceAnd (Logical And)
         // step 1: init dst 0x3f800000 (1.0f)
         //              aux 0x3f800000 (1.0f)
         //             zero 0x00000000 (0.0f)
@@ -182,7 +183,7 @@ private:
         //         case 4     0        0         0.0f     0.0f     0.0f
         // step 5: loop: offset src, and do step 2 and step 3
         //
-        // Reduce::Or (Logical Or)
+        // ReduceOr (Logical Or)
         // step 1: init dst 0x00000000 (0.0f)
         //              aux 0x3f800000 (1.0f)
         //             zero 0x00000000 (0.0f)
@@ -197,7 +198,7 @@ private:
         // step 5: loop: offset src, and do step 2 and step 3
         // ================================================================
         // ***isa: OTHER***
-        // Reduce::And (Logical And)
+        // ReduceAnd (Logical And)
         // step 1: init dst 0x3f800000 (1.0f)
         // step 2: if src equals 0, set it 0x00000000, else set 0xffffffff
         // step 3: dst = dst & src
@@ -207,7 +208,7 @@ private:
         //         0x00000000 = 0x00000000 & 0x00000000 (result: 0.0f)
         // step 4: loop: offset src, and do step 2 and step 3
         //
-        // Reduce::Or (Logical Or)
+        // ReduceOr (Logical Or)
         // step 1: init dst 0x00000000 (0.0f)
         //              aux 0x3f800000 (1.0f)
         // step 2: dst = dst | src
@@ -238,7 +239,7 @@ private:
             cmp(reg_work_amount, step);
             jl(reduce_main_end_label, T_NEAR); //avoid illegal loading and storing
 
-            if (jcp_.reduce_mode == Reduce::L1) {
+            if (jcp_.reduce_mode == ReduceL1) {
                 uni_vmovups(vmm_aux, table_val(1));
             }
 
@@ -281,30 +282,30 @@ private:
         {
             // init dst, dst loading is embedded in horiz_reduce_store
             switch (jcp_.reduce_mode) {
-                case Reduce::And:
-                case Reduce::Prod:
+                case ReduceAnd:
+                case ReduceProd:
                     uni_vmovups(vmm_dst, table_val(0));
                     break;
-                case Reduce::L1:
+                case ReduceL1:
                     uni_vmovups(vmm_aux, table_val(1));
                     uni_vpxor(vmm_dst, vmm_dst, vmm_dst);
                     break;
-                case Reduce::L2:
-                case Reduce::LogSum:
-                case Reduce::LogSumExp:
-                case Reduce::Mean:
-                case Reduce::Or:
-                case Reduce::Sum:
-                case Reduce::SumSquare:
+                case ReduceL2:
+                case ReduceLogSum:
+                case ReduceLogSumExp:
+                case ReduceMean:
+                case ReduceOr:
+                case ReduceSum:
+                case ReduceSumSquare:
                     uni_vpxor(vmm_dst, vmm_dst, vmm_dst);
                     break;
-                case Reduce::Max:
+                case ReduceMax:
                     if (isFloatCompatible(jcp_.dst_dt))
                         uni_vmovups(vmm_dst, table_val(2));
                     else
                         uni_vmovups(vmm_dst, table_val(4));
                     break;
-                case Reduce::Min:
+                case ReduceMin:
                     if (isFloatCompatible(jcp_.dst_dt))
                         uni_vmovups(vmm_dst, table_val(3));
                     else
@@ -315,7 +316,7 @@ private:
             }
             // reduce
             reduce_main_loop();
-            if (jcp_.reduce_mode == Reduce::Or && isa != avx512_common) {
+            if (jcp_.reduce_mode == ReduceOr && isa != avx512_common) {
                 vcmpneqps(vmm_dst, vmm_dst, vmm_zero);
                 uni_vandps(vmm_dst, vmm_dst, vmm_aux);
             }
@@ -328,7 +329,7 @@ private:
     }
 
     inline void reduce_tail() {
-        if (jcp_.reduce_mode == Reduce::L1) {
+        if (jcp_.reduce_mode == ReduceL1) {
             uni_vmovups(xmm_aux, table_val(1));
         }
 
@@ -359,7 +360,7 @@ private:
 
                 // reduce
                 reduce_kernel_scalar(xmm_src, xmm_dst);
-                if (jcp_.reduce_mode == Reduce::Or) {
+                if (jcp_.reduce_mode == ReduceOr) {
                     vcmpneqps(xmm_dst, xmm_dst, xmm_zero);
                     uni_vandps(xmm_dst, xmm_dst, xmm_aux);
                 }
@@ -398,7 +399,7 @@ private:
                 load_scalar(xmm_src, ptr[reg_src], jcp_.src_dt);
 
                 reduce_kernel_scalar(xmm_src, xmm_dst);
-                if (jcp_.reduce_mode == Reduce::Or) {
+                if (jcp_.reduce_mode == ReduceOr) {
                     vcmpneqps(xmm_dst, xmm_dst, xmm_zero);
                     uni_vandps(xmm_dst, xmm_dst, xmm_aux);
                 }
@@ -446,7 +447,7 @@ private:
 
     inline void reduce_kernel(Vmm vmm_src, Vmm vmm_dst) {
         switch (jcp_.reduce_mode) {
-            case Reduce::And:
+            case ReduceAnd:
                 if (isa == avx512_common) {
                     vcmpps(k_mask, vmm_src, vmm_zero, _cmp_neq_uq);
                     vblendmps(vmm_src | k_mask, vmm_zero, vmm_aux);
@@ -455,38 +456,38 @@ private:
                 }
                 uni_vandps(vmm_dst, vmm_dst, vmm_src);
                 break;
-            case Reduce::L1:
+            case ReduceL1:
                 uni_vandps(vmm_src, vmm_src, vmm_aux);
                 uni_vaddps(vmm_dst, vmm_dst, vmm_src);
                 break;
-            case Reduce::LogSum:
-            case Reduce::Mean:
-            case Reduce::Sum:
+            case ReduceLogSum:
+            case ReduceMean:
+            case ReduceSum:
                 uni_vaddps(vmm_dst, vmm_dst, vmm_src);
                 break;
-            case Reduce::Max:
+            case ReduceMax:
                 uni_vmaxps(vmm_dst, vmm_dst, vmm_src);
                 break;
-            case Reduce::Min:
+            case ReduceMin:
                 uni_vminps(vmm_dst, vmm_dst, vmm_src);
                 break;
-            case Reduce::L2:
-            case Reduce::SumSquare:
+            case ReduceL2:
+            case ReduceSumSquare:
                 uni_vmulps(vmm_src, vmm_src, vmm_src);
                 uni_vaddps(vmm_dst, vmm_dst, vmm_src);
                 break;
-            case Reduce::LogSumExp:
+            case ReduceLogSumExp:
                 exp_injector->compute_vector_range(vmm_src.getIdx(), vmm_src.getIdx() + 1);
                 uni_vaddps(vmm_dst, vmm_dst, vmm_src);
                 break;
-            case Reduce::Or:
+            case ReduceOr:
                 if (isa == avx512_common) {
                     vcmpps(k_mask, vmm_src, vmm_zero, _cmp_neq_uq);
                     vblendmps(vmm_src | k_mask, vmm_zero, vmm_aux);
                 }
                 uni_vorps(vmm_dst, vmm_dst, vmm_src);
                 break;
-            case Reduce::Prod:
+            case ReduceProd:
                 uni_vmulps(vmm_dst, vmm_dst, vmm_src);
                 break;
             default:
@@ -496,38 +497,38 @@ private:
 
     inline void reduce_kernel_scalar(Xmm xmm_src, Xmm xmm_dst) {
         switch (jcp_.reduce_mode) {
-            case Reduce::And:
+            case ReduceAnd:
                 vcmpneqps(xmm_src, xmm_src, xmm_zero);
                 uni_vandps(xmm_dst, xmm_dst, xmm_src);
                 break;
-            case Reduce::L1:
+            case ReduceL1:
                 uni_vandps(xmm_src, xmm_src, xmm_aux);
                 uni_vaddps(xmm_dst, xmm_dst, xmm_src);
                 break;
-            case Reduce::LogSum:
-            case Reduce::Mean:
-            case Reduce::Sum:
+            case ReduceLogSum:
+            case ReduceMean:
+            case ReduceSum:
                 uni_vaddps(xmm_dst, xmm_dst, xmm_src);
                 break;
-            case Reduce::Max:
+            case ReduceMax:
                 uni_vmaxps(xmm_dst, xmm_dst, xmm_src);
                 break;
-            case Reduce::Min:
+            case ReduceMin:
                 uni_vminps(xmm_dst, xmm_dst, xmm_src);
                 break;
-            case Reduce::L2:
-            case Reduce::SumSquare:
+            case ReduceL2:
+            case ReduceSumSquare:
                 uni_vmulps(xmm_src, xmm_src, xmm_src);
                 uni_vaddps(xmm_dst, xmm_dst, xmm_src);
                 break;
-            case Reduce::LogSumExp:
+            case ReduceLogSumExp:
                 exp_injector->compute_vector_range(xmm_src.getIdx(), xmm_src.getIdx() + 1);
                 uni_vaddps(xmm_dst, xmm_dst, xmm_src);
                 break;
-            case Reduce::Or:
+            case ReduceOr:
                 uni_vorps(xmm_dst, xmm_dst, xmm_src);
                 break;
-            case Reduce::Prod:
+            case ReduceProd:
                 uni_vmulps(xmm_dst, xmm_dst, xmm_src);
                 break;
             default:
@@ -542,7 +543,7 @@ private:
     }
 
     inline void store_dst_vector() {
-        if (jcp_.reduce_mode == Reduce::Or && isa != avx512_common) {
+        if (jcp_.reduce_mode == ReduceOr && isa != avx512_common) {
             vcmpneqps(vmm_dst, vmm_dst, vmm_zero);
             uni_vandps(vmm_dst, vmm_dst, vmm_aux);
             if (isa == cpu::x64::sse41) {
@@ -759,28 +760,28 @@ private:
 
     inline void horiz_ps(const Xmm& xmm, const Operand& op) {
         switch (jcp_.reduce_mode) {
-            case Reduce::And:
+            case ReduceAnd:
                 andps(xmm, op);
                 break;
-            case Reduce::L1:
-            case Reduce::L2:
-            case Reduce::LogSum:
-            case Reduce::Mean:
-            case Reduce::Sum:
-            case Reduce::SumSquare:
-            case Reduce::LogSumExp:
+            case ReduceL1:
+            case ReduceL2:
+            case ReduceLogSum:
+            case ReduceMean:
+            case ReduceSum:
+            case ReduceSumSquare:
+            case ReduceLogSumExp:
                 addps(xmm, op);
                 break;
-            case Reduce::Max:
+            case ReduceMax:
                 maxps(xmm, op);
                 break;
-            case Reduce::Min:
+            case ReduceMin:
                 minps(xmm, op);
                 break;
-            case Reduce::Or:
+            case ReduceOr:
                 orps(xmm, op);
                 break;
-            case Reduce::Prod:
+            case ReduceProd:
                 mulps(xmm, op);
                 break;
             default:
@@ -854,7 +855,7 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi
         if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
             emu_vcvtneps2bf16->emit_data();
 
-        if (jcp_.reduce_mode == Reduce::LogSum || jcp_.reduce_mode == Reduce::LogSumExp) {
+        if (jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp) {
             log_injector->prepare_table();
         }
     }
@@ -937,9 +938,9 @@ private:
         // cases: [ReduceL2] [ReduceLogSum] [ReduceLogSumExp] [ReduceMean]
         L(reduce_map_label);
         {
-            if (jcp_.reduce_mode == Reduce::L2 || jcp_.reduce_mode == Reduce::Mean ||
-                jcp_.reduce_mode == Reduce::LogSum || jcp_.reduce_mode == Reduce::LogSumExp) {
-                if (jcp_.reduce_mode == Reduce::Mean)
+            if (jcp_.reduce_mode == ReduceL2 || jcp_.reduce_mode == ReduceMean ||
+                jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp) {
+                if (jcp_.reduce_mode == ReduceMean)
                     uni_vbroadcastss(vmm_aux, ptr[reg_divisor]);
 
                 Xbyak::Label reduce_loop_label;
@@ -979,9 +980,9 @@ private:
     inline void reduce_post_tail() {
         // reduce map for tail in dst memory
         // cases: [ReduceL2] [ReduceLogSum] [ReduceLogSumExp] [ReduceMean] in planar layout
-        if (jcp_.reduce_mode == Reduce::L2 || jcp_.reduce_mode == Reduce::Mean ||
-                jcp_.reduce_mode == Reduce::LogSum || jcp_.reduce_mode == Reduce::LogSumExp) {
-            if (jcp_.reduce_mode == Reduce::Mean)
+        if (jcp_.reduce_mode == ReduceL2 || jcp_.reduce_mode == ReduceMean ||
+                jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp) {
+            if (jcp_.reduce_mode == ReduceMean)
                 uni_vbroadcastss(xmm_aux, ptr[reg_divisor]);
 
             Xbyak::Label reduce_loop_label;
@@ -1012,20 +1013,20 @@ private:
     }
 
     inline void reduce_map_kernel(Vmm vmm_dst) {
-        if (jcp_.reduce_mode == Reduce::Mean)
+        if (jcp_.reduce_mode == ReduceMean)
             uni_vdivps(vmm_dst, vmm_dst, vmm_aux);
-        else if (jcp_.reduce_mode == Reduce::L2)
+        else if (jcp_.reduce_mode == ReduceL2)
             uni_vsqrtps(vmm_dst, vmm_dst);
-        else if (jcp_.reduce_mode == Reduce::LogSum || jcp_.reduce_mode == Reduce::LogSumExp)
+        else if (jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp)
             log_injector->compute_vector_range(vmm_dst.getIdx(), vmm_dst.getIdx() + 1);
     }
 
     inline void reduce_map_kernel_scalar(Xmm xmm_dst) {
-        if (jcp_.reduce_mode == Reduce::Mean)
+        if (jcp_.reduce_mode == ReduceMean)
             uni_vdivps(xmm_dst, xmm_dst, xmm_aux);
-        else if (jcp_.reduce_mode == Reduce::L2)
+        else if (jcp_.reduce_mode == ReduceL2)
             uni_vsqrtps(xmm_dst, xmm_dst);
-        else if (jcp_.reduce_mode == Reduce::LogSum || jcp_.reduce_mode == Reduce::LogSumExp)
+        else if (jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp)
             log_injector->compute_vector_range(xmm_dst.getIdx(), xmm_dst.getIdx() + 1);
     }
 
@@ -1289,28 +1290,28 @@ private:
 
     inline void horiz_ps(const Xmm& xmm, const Operand& op) {
         switch (jcp_.reduce_mode) {
-            case Reduce::And:
+            case ReduceAnd:
                 andps(xmm, op);
                 break;
-            case Reduce::L1:
-            case Reduce::L2:
-            case Reduce::LogSum:
-            case Reduce::Mean:
-            case Reduce::Sum:
-            case Reduce::SumSquare:
-            case Reduce::LogSumExp:
+            case ReduceL1:
+            case ReduceL2:
+            case ReduceLogSum:
+            case ReduceMean:
+            case ReduceSum:
+            case ReduceSumSquare:
+            case ReduceLogSumExp:
                 addps(xmm, op);
                 break;
-            case Reduce::Max:
+            case ReduceMax:
                 maxps(xmm, op);
                 break;
-            case Reduce::Min:
+            case ReduceMin:
                 minps(xmm, op);
                 break;
-            case Reduce::Or:
+            case ReduceOr:
                 orps(xmm, op);
                 break;
-            case Reduce::Prod:
+            case ReduceProd:
                 mulps(xmm, op);
                 break;
             default:
@@ -1319,51 +1320,96 @@ private:
     }
 };
 
-MKLDNNReduceNode::MKLDNNReduceNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNReduceNode&)>> MKLDNNReduceNode::initializers = {
+    {ngraph::opset4::ReduceL1::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node) {
+        node.algorithm = ReduceL1;
+    }},
+    {ngraph::opset4::ReduceL2::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node) {
+        node.algorithm = ReduceL2;
+    }},
+    {ngraph::opset1::ReduceLogicalAnd::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node) {
+        node.algorithm = ReduceAnd;
+    }},
+    {ngraph::opset1::ReduceLogicalOr::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node) {
+        node.algorithm = ReduceOr;
+    }},
+    {ngraph::opset1::ReduceMax::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node) {
+        node.algorithm = ReduceMax;
+    }},
+    {ngraph::opset1::ReduceMean::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node) {
+        node.algorithm = ReduceMean;
+    }},
+    {ngraph::opset1::ReduceMin::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node) {
+        node.algorithm = ReduceMin;
+    }},
+    {ngraph::opset1::ReduceProd::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node) {
+        node.algorithm = ReduceProd;
+    }},
+    {ngraph::opset1::ReduceSum::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node) {
+        node.algorithm = ReduceSum;
+    }}
+};
+
+bool MKLDNNReduceNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (std::dynamic_pointer_cast<ngraph::op::util::ArithmeticReductionKeepDims>(op) == nullptr &&
+                std::dynamic_pointer_cast<ngraph::op::util::LogicalReductionKeepDims>(op) == nullptr) {
+            errorMessage = "Reduce node with name " + op->get_friendly_name() + " is not derived from ArithmeticReductionKeepDims or LogicalReductionKeepDims";
+            return false;
+        }
+        if (initializers.find(op->get_type_info()) == initializers.end()) {
+            errorMessage = "Doesn't support Reduce algorithm: " +  std::string(op->get_type_info().name);
+            return false;
+        }
+        if (std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(REDUCE_INDEXES)) == nullptr) {
+            errorMessage = "Only const 'reduce_indexes' input is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNReduceNode::MKLDNNReduceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "Reduce node with name '" + getName() + "'";
+        initializers[op->get_type_info()](op, *this);
+        if (const auto reduce = std::dynamic_pointer_cast<ngraph::op::util::ArithmeticReductionKeepDims>(op)) {
+            keep_dims = reduce->get_keep_dims();
+        } else if (const auto reduce = std::dynamic_pointer_cast<ngraph::op::util::LogicalReductionKeepDims>(op)) {
+            keep_dims = reduce->get_keep_dims();
+        }
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNReduceNode::getSupportedDescriptors() {
     if (!descs.empty())
         return;
 
     if (getParentEdges().size() != 2)
-        IE_THROW() << "Reduce layer with name " << getName() << " gets incorrect number of input edges!";
+        IE_THROW() << errorPrefix << " gets incorrect number of input edges!";
     if (getChildEdges().empty())
-        IE_THROW() << "Reduce layer with name " << getName() << " gets incorrect number of output edges!";
+        IE_THROW() << errorPrefix << " gets incorrect number of output edges!";
 
     if (getParentEdgeAt(REDUCE_INDEXES)->getDims().ndims() != 1) {
-        IE_THROW() << "Reduce layer with name " << getName() << " gets incorrect index vector dimension! Index vector should be 1 dimension.";
+        IE_THROW() << errorPrefix << " gets incorrect index vector dimension! Index vector should be 1 dimension.";
     }
 
-    auto *layer = getCnnLayer().get();
-    keep_dims = layer->GetParamAsBool("keep_dims", false);
-
     if (keep_dims) {
         if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() != getChildEdgeAt(0)->getDims().ndims())
-            IE_THROW() << "Reduce layer with name " << getName() << "gets incorrect number of input/output dimensions!";
+            IE_THROW() << errorPrefix << " gets incorrect number of input/output dimensions!";
     } else {
         // In fact, after the Reduce operation, the shape must be a scalar if the previous one was 1d.
         // But for now, 0d tensor (scalar) is emulated as 1d tensor. Skip checking in such cases.
         bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 1 && getChildEdgeAt(0)->getDims().ndims() == 1;
         if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= getChildEdgeAt(0)->getDims().ndims() && !is_emulated_0d_as_1d)
-            IE_THROW() << "Reduce layer with name " << getName() << "gets incorrect number of input/output dimensions!";
+            IE_THROW() << errorPrefix << "gets incorrect number of input/output dimensions!";
     }
-
-    Type reduce_mode = getType();
-    if (reduce_mode == ReduceAnd) reduceMode = Reduce::And;
-    else if (reduce_mode == ReduceL1) reduceMode = Reduce::L1;
-    else if (reduce_mode == ReduceL2) reduceMode = Reduce::L2;
-    else if (reduce_mode == ReduceLogSum) reduceMode = Reduce::LogSum;
-    else if (reduce_mode == ReduceLogSumExp) reduceMode = Reduce::LogSumExp;
-    else if (reduce_mode == ReduceMax) reduceMode = Reduce::Max;
-    else if (reduce_mode == ReduceMean) reduceMode = Reduce::Mean;
-    else if (reduce_mode == ReduceMin) reduceMode = Reduce::Min;
-    else if (reduce_mode == ReduceOr) reduceMode = Reduce::Or;
-    else if (reduce_mode == ReduceProd) reduceMode = Reduce::Prod;
-    else if (reduce_mode == ReduceSum) reduceMode = Reduce::Sum;
-    else if (reduce_mode == ReduceSumSquare) reduceMode = Reduce::SumSquare;
-    else
-        IE_THROW() << "Reduce layer with name " << getName() << " gets unsupported Reduce layer type!";
 }
 
 void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() {
@@ -1378,8 +1424,8 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() {
             Precision::U8
     };
 
-    Precision inputPrecision = getCnnLayer()->insData[REDUCE_DATA].lock()->getPrecision();
-    Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision();
+    Precision inputPrecision = getOriginalInputPrecisionAtPort(REDUCE_DATA);
+    Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
 
     jit_mode = (mayiuse(cpu::x64::sse41)) && getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= 5 &&
                std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), inputPrecision) != std::end(supportedPrecisions) &&
@@ -1391,8 +1437,8 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() {
         if (Precision::BF16 == outputPrecision) {
             if (!mayiuse(avx512_core)) {
                     outputPrecision = Precision::FP32;
-            } else if (reduceMode != Reduce::And && reduceMode != Reduce::Or &&
-                       reduceMode != Reduce::Max && reduceMode != Reduce::Min) {
+            } else if (algorithm != ReduceAnd && algorithm != ReduceOr &&
+                       algorithm != ReduceMin && algorithm != ReduceMax) {
                             outputPrecision = Precision::FP32;
             }
         }
@@ -1462,11 +1508,11 @@ void MKLDNNReduceNode::createPrimitive() {
     auto &srcDataMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr();
     auto &srcIndexesMemPtr = getParentEdgeAt(REDUCE_INDEXES)->getMemoryPtr();
     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Reduce layer with name " << getName() << "didn't allocate destination memory.";
+        IE_THROW() << errorPrefix << " has not allocated destination memory.";
     if (!srcDataMemPtr || !srcDataMemPtr->GetPrimitivePtr() || !srcIndexesMemPtr || !srcIndexesMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Reduce layer with name " << getName() << "didn't allocate input memory.";
+        IE_THROW() << errorPrefix << " has not allocate input memory.";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << "Reduce layer with name " << getName() << "didn't set preferable primitive descriptor.";
+        IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
 
     auto selectedPD = getSelectedPrimitiveDescriptor();
     planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDesc().isPlainFormat();
@@ -1477,7 +1523,7 @@ void MKLDNNReduceNode::createPrimitive() {
     jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt);
     jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt);
     jcp.planar_layout = planar_layout;
-    jcp.reduce_mode = reduceMode;
+    jcp.reduce_mode = getAlgorithm();
 
     if (mayiuse(cpu::x64::avx512_common)) {
         reduce_kernel.reset(new jit_uni_reduce_kernel_f32<cpu::x64::avx512_common>(jcp));
@@ -1549,7 +1595,7 @@ void MKLDNNReduceNode::execute(mkldnn::stream strm) {
             auto out_ptr = reinterpret_cast<float *>(dst_data);
             reduce_ref(in_ptr, out_ptr);
         } else {
-            IE_THROW() << "Reduce layer with name " << getName() << "only supports plain layout on machine w/o sse42.";
+            IE_THROW() << errorPrefix << " supports only plain layout on machine w/o sse42.";
         }
     }
 }
@@ -1560,8 +1606,8 @@ void MKLDNNReduceNode::reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr, size
     if (planar_layout) {
         reduce_PLN(in_ptr, out_ptr);
     } else {
-        if ((reduceMode == Reduce::And || reduceMode == Reduce::LogSumExp || reduceMode == Reduce::Max ||
-             reduceMode == Reduce::Min || reduceMode == Reduce::Prod) && ReduceC) {
+        if ((algorithm == ReduceAnd || algorithm == ReduceLogSumExp || algorithm == ReduceMax ||
+             algorithm == ReduceMin || algorithm == ReduceProd) && ReduceC) {
             reduce_BLK_concern_padding(in_ptr, out_ptr);
         } else {
             reduce_BLK(in_ptr, out_ptr);
@@ -1802,19 +1848,19 @@ inline void MKLDNNReduceNode::reduce_kernel_post_process(uint8_t *out_ptr) {
 }
 
 inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
-    switch (reduceMode) {
-        case Reduce::L1:
-        case Reduce::L2:
-        case Reduce::LogSum:
-        case Reduce::LogSumExp:
-        case Reduce::Mean:
-        case Reduce::Or:
-        case Reduce::Sum:
-        case Reduce::SumSquare:
+    switch (algorithm) {
+        case ReduceL1:
+        case ReduceL2:
+        case ReduceLogSum:
+        case ReduceLogSumExp:
+        case ReduceMean:
+        case ReduceOr:
+        case ReduceSum:
+        case ReduceSumSquare:
             memset(out_ptr, 0, dst_size);
             break;
-        case Reduce::And:
-        case Reduce::Prod:
+        case ReduceAnd:
+        case ReduceProd:
             if (output_prec == Precision::FP32) {
                 auto out_p = reinterpret_cast<float *>(out_ptr);
                 parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<float>(1); });
@@ -1832,7 +1878,7 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
                 parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast<int8_t>(1); });
             }
             break;
-        case Reduce::Max:
+        case ReduceMax:
             if (output_prec == Precision::FP32) {
                 auto out_p = reinterpret_cast<float *>(out_ptr);
                 parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<float>::min(); });
@@ -1850,7 +1896,7 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
                 parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<int8_t>::min(); });
             }
             break;
-        case Reduce::Min:
+        case ReduceMin:
             if (output_prec == Precision::FP32) {
                 auto out_p = reinterpret_cast<float *>(out_ptr);
                 parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits<float>::max(); });
@@ -1869,7 +1915,7 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
             }
             break;
         default:
-            IE_THROW() << "Reduce layer with name " << getName() << "gets unsupported reduce mode.";
+            IE_THROW() << errorPrefix << " gets unsupported reduce mode.";
     }
 }
 
@@ -1882,7 +1928,7 @@ inline void MKLDNNReduceNode::calc_process_dst_dims(const int32_t *idx_data) {
         if (axis < 0)
             axis += src_dims.size();
         if (static_cast<size_t>(axis) > src_dims.size())
-            IE_THROW() << "Reduce layer with name " << getName() << "exceeds data tensor dimension on index to reduce";
+            IE_THROW() << errorPrefix << " exceeds data tensor dimension on index to reduce";
         axes.insert(static_cast<size_t>(axis));
     }
     for (size_t i = 0; i < src_dims.size(); i++) {
@@ -1904,52 +1950,52 @@ inline void MKLDNNReduceNode::calc_process_dst_dims(const int32_t *idx_data) {
     }
     for (size_t i = 0; i < std::min(out_dims.size(), dst_dims.size()); i++) {
         if (out_dims[i] != dst_dims[i])
-            IE_THROW() << "Reduce layer with name " << getName() << "gets incorrect number of output dimensions!";
+            IE_THROW() << errorPrefix << "gets incorrect number of output dimensions!";
     }
 }
 
 inline void MKLDNNReduceNode::reduce_ref(const float *in_ptr, float *out_ptr) {
-    switch (reduceMode) {
-        case Reduce::And:
+    switch (algorithm) {
+        case ReduceAnd:
             reduce_ref_process(in_ptr, out_ptr, 1, [](float x, float y)->float { return x && y; });
             break;
-        case Reduce::L1:
+        case ReduceL1:
             reduce_ref_process(in_ptr, out_ptr, 0, [](float old, float y)->float { return old + (y >= 0 ? y : -y); });
             break;
-        case Reduce::L2:
+        case ReduceL2:
             reduce_ref_process(in_ptr, out_ptr, 0, [](float old, float y)->float { return old + y * y; });
             break;
-        case Reduce::LogSum:
+        case ReduceLogSum:
             reduce_ref_process(in_ptr, out_ptr, 0, [](float x, float y)->float { return x + y; });
             break;
-        case Reduce::LogSumExp:
+        case ReduceLogSumExp:
             reduce_ref_process(in_ptr, out_ptr, 0, [](float old, float y)->float { return old + expf(y); });
             break;
-        case Reduce::Max:
+        case ReduceMax:
             reduce_ref_process(in_ptr, out_ptr, std::numeric_limits<float>::min(),
                                                     [](float x, float y)->float { return x > y ? x : y; });
             break;
-        case Reduce::Mean:
+        case ReduceMean:
             reduce_ref_process(in_ptr, out_ptr, 0, [](float x, float y)->float { return x + y; });
             break;
-        case Reduce::Min:
+        case ReduceMin:
             reduce_ref_process(in_ptr, out_ptr, std::numeric_limits<float>::max(),
                                                     [](float x, float y)->float { return x < y ? x : y; });
             break;
-        case Reduce::Or:
+        case ReduceOr:
             reduce_ref_process(in_ptr, out_ptr, 0, [](float x, float y)->float { return x || y; });
             break;
-        case Reduce::Prod:
+        case ReduceProd:
             reduce_ref_process(in_ptr, out_ptr, 1, [](float x, float y)->float { return x * y; });
             break;
-        case Reduce::Sum:
+        case ReduceSum:
             reduce_ref_process(in_ptr, out_ptr, 0, [](float x, float y)->float { return x + y; });
             break;
-        case Reduce::SumSquare:
+        case ReduceSumSquare:
             reduce_ref_process(in_ptr, out_ptr, 0, [](float old, float y)->float { return old + y * y; });
             break;
     default:
-        IE_THROW() << "Reduce layer with name " << getName() << "gets unsupported reduce mode.";
+        IE_THROW() << errorPrefix << "gets unsupported reduce mode.";
     }
 }
 
@@ -2008,53 +2054,39 @@ void MKLDNNReduceNode::reduce_ref_process(const float *in_ptr, float *out_ptr, f
 }
 
 inline void MKLDNNReduceNode::reduce_ref_map(float *out_ptr, size_t work_amount_dst, size_t reduced_dims_work_amount) {
-    switch (reduceMode) {
-        case Reduce::And:
-        case Reduce::L1:
-        case Reduce::Max:
-        case Reduce::Min:
-        case Reduce::Or:
-        case Reduce::Prod:
-        case Reduce::Sum:
-        case Reduce::SumSquare:
+    switch (algorithm) {
+        case ReduceAnd:
+        case ReduceL1:
+        case ReduceMax:
+        case ReduceMin:
+        case ReduceOr:
+        case ReduceProd:
+        case ReduceSum:
+        case ReduceSumSquare:
             break;
-        case Reduce::L2:
+        case ReduceL2:
             parallel_for(work_amount_dst, [&](size_t i) {
                 out_ptr[i] = std::sqrt(out_ptr[i]);
             });
             break;
-        case Reduce::LogSum:
-        case Reduce::LogSumExp:
+        case ReduceLogSum:
+        case ReduceLogSumExp:
             parallel_for(work_amount_dst, [&](size_t i) {
                 out_ptr[i] = logf(out_ptr[i]);
             });
             break;
-        case Reduce::Mean:
+        case ReduceMean:
             parallel_for(work_amount_dst, [&](size_t i) {
                 out_ptr[i] /= reduced_dims_work_amount;
             });
             break;
         default:
-            IE_THROW() << "Reduce layer with name " << getName() << "gets unsupported reduce mode.";
+            IE_THROW() << errorPrefix << "gets unsupported reduce mode.";
     }
 }
 
 bool MKLDNNReduceNode::created() const {
-    return getType() == ReduceAnd || getType() == ReduceL1 || getType() == ReduceL2 ||
-           getType() == ReduceLogSum || getType() == ReduceLogSumExp || getType() == ReduceMax ||
-           getType() == ReduceMean || getType() == ReduceMin || getType() == ReduceOr ||
-           getType() == ReduceProd || getType() == ReduceSum || getType() == ReduceSumSquare;
+    return getType() == Reduce;
 }
 
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceAnd);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceL1);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceL2);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceLogSum);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceLogSumExp);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceMax);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceMean);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceMin);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceOr);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceProd);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceSum);
-REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, ReduceSumSquare);
+REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, Reduce);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h
index 1005cf58a04..f0e386567c2 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h
@@ -12,24 +12,9 @@
 
 namespace MKLDNNPlugin {
 
-enum class Reduce {
-    And,
-    L1,
-    L2,
-    LogSum,
-    LogSumExp,
-    Max,
-    Mean,
-    Min,
-    Or,
-    Prod,
-    Sum,
-    SumSquare
-};
-
 struct jit_reduce_config_params {
     bool planar_layout;
-    Reduce reduce_mode;
+    Algorithm reduce_mode;
     mkldnn::memory::data_type src_dt;
     mkldnn::memory::data_type dst_dt;
     int src_data_size;
@@ -79,7 +64,7 @@ struct jit_uni_reduce_post_kernel {
 
 class MKLDNNReduceNode : public MKLDNNNode {
 public:
-    MKLDNNReduceNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNReduceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNReduceNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -91,6 +76,8 @@ public:
         return false;
     }
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     void reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr, size_t dst_size);
     void reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr);
@@ -104,11 +91,10 @@ private:
     void reduce_ref_process(const float *in_ptr, float *out_ptr, float init_value, std::function<float(float, float)> func);
     inline void reduce_ref_map(float *out_ptr, size_t work_amount_dst, size_t reduced_dims_work_amount);
 
-    Reduce reduceMode = Reduce::Sum;
     size_t blk_size;
     size_t dims_size;
-    const size_t REDUCE_DATA = 0;
-    const size_t REDUCE_INDEXES = 1;
+    static const size_t REDUCE_DATA = 0;
+    static const size_t REDUCE_INDEXES = 1;
     bool planar_layout = true;
     bool jit_mode = true;
     bool keep_dims = true;
@@ -124,6 +110,10 @@ private:
 
     std::shared_ptr<jit_uni_reduce_kernel> reduce_kernel;
     std::shared_ptr<jit_uni_reduce_post_kernel> reduce_post_kernel;
+
+    static std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>& op, MKLDNNReduceNode& node)>> initializers;
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp
new file mode 100644
index 00000000000..ba314cebea5
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp
@@ -0,0 +1,85 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mkldnn_reference_node.h"
+#include <ie_ngraph_utils.hpp>
+#include <mkldnn_extension_utils.h>
+#include <ngraph/runtime/host_tensor.hpp>
+
+using namespace mkldnn;
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+using namespace InferenceEngine::details;
+
+MKLDNNReferenceNode::MKLDNNReferenceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache,
+                                         const std::string& errorMessage) :
+        MKLDNNNode(op, eng, cache), ngraphOp(op), additionalErrorMessage(errorMessage) {
+    setType(Reference);
+    setTypeStr("Reference");
+}
+
+void MKLDNNReferenceNode::getSupportedDescriptors() {}
+
+void MKLDNNReferenceNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    InferenceEngine::LayerConfig config;
+    for (size_t i = 0; i < inDims.size(); i++) {
+        InferenceEngine::DataConfig dataConfig;
+        dataConfig.inPlace = -1;
+        dataConfig.constant = false;
+
+        dataConfig.desc = MKLDNNMemoryDesc(inDims[i],
+                MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_input_element_type(i))),
+                MKLDNNMemory::GetPlainFormat(inDims[i]));
+
+        config.inConfs.push_back(dataConfig);
+    }
+
+    for (size_t i = 0; i < outDims.size(); i++) {
+        InferenceEngine::DataConfig dataConfig;
+        dataConfig.inPlace = -1;
+        dataConfig.constant = false;
+
+        dataConfig.desc = MKLDNNMemoryDesc(outDims[i],
+                MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_output_element_type(i))),
+                MKLDNNMemory::GetPlainFormat(outDims[i]));
+
+        config.outConfs.push_back(dataConfig);
+    }
+
+    supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, memory::format_tag::undef});
+}
+
+void MKLDNNReferenceNode::createPrimitive() {}
+
+void MKLDNNReferenceNode::execute(mkldnn::stream strm) {
+    ngraph::HostTensorVector inputs;
+    for (size_t i = 0; i < inDims.size(); i++) {
+        void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetPtr();
+        inputs.push_back(std::make_shared<ngraph::HostTensor>(ngraphOp->get_input_element_type(i), ngraphOp->get_input_shape(i), srcDataPtr));
+    }
+
+    ngraph::HostTensorVector outputs;
+    for (size_t i = 0; i < outDims.size(); i++) {
+        void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().GetPtr();
+        outputs.push_back(std::make_shared<ngraph::HostTensor>(ngraphOp->get_output_element_type(i), ngraphOp->get_output_shape(i), dstDataPtr));
+    }
+
+    if (!ngraphOp->evaluate(outputs, inputs)) {
+        std::string errorDetails = "Unsupported operation of type: " + std::string(ngraphOp->get_type_name()) +
+                                   " name: " + std::string(ngraphOp->get_friendly_name());
+        errorDetails += "\nDetails: \n";
+        if (!additionalErrorMessage.empty()) {
+            errorDetails += additionalErrorMessage + "\n";
+        }
+        errorDetails += "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented)";
+        IE_THROW(NotImplemented) << errorDetails;
+    }
+}
+
+bool MKLDNNReferenceNode::created() const {
+    return getType() == Reference;
+}
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h
new file mode 100644
index 00000000000..ed78ffe14bd
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h
@@ -0,0 +1,30 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+//#include <ie_common.h>
+#include <mkldnn_node.h>
+//#include <string>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNReferenceNode : public MKLDNNNode {
+public:
+    MKLDNNReferenceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache, const std::string& errorMessage);
+    ~MKLDNNReferenceNode() override = default;
+
+    void getSupportedDescriptors() override;
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override;
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+private:
+    const std::shared_ptr<ngraph::Node> ngraphOp;
+    const std::string additionalErrorMessage;
+};
+
+}  // namespace MKLDNNPlugin
+
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp
new file mode 100644
index 00000000000..d530c051a64
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp
@@ -0,0 +1,422 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "mkldnn_region_yolo_node.h"
+#include <nodes/common/tensor_desc_creator.h>
+#include <ngraph/opsets/opset1.hpp>
+#include "common/cpu_convert.h"
+#include <cpu/x64/jit_generator.hpp>
+#include <emitters/jit_bf16_emitters.hpp>
+#include <cpu/x64/jit_uni_eltwise_injector.hpp>
+#include "utils/bfloat16.hpp"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+using namespace mkldnn::impl::cpu;
+using namespace mkldnn::impl::cpu::x64;
+using namespace mkldnn::impl::utils;
+
+#define GET_OFF(field) offsetof(jit_args_logistic, field)
+
+template <cpu_isa_t isa>
+struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_logistic_kernel_f32)
+
+    jit_uni_logistic_kernel_f32(jit_logistic_config_params jcp) : jcp_(jcp), jit_uni_logistic_kernel(), jit_generator() {}
+
+    void create_ker() override {
+        jit_generator::create_kernel();
+        ker_ = (decltype(ker_))jit_ker();
+    }
+
+    void generate() override {
+        exp_injector.reset(new jit_uni_eltwise_injector_f32<isa>(this, mkldnn::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f));
+
+        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
+            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa, nullptr));
+
+        this->preamble();
+
+        mov(reg_src, ptr[reg_params + GET_OFF(src)]);
+        mov(reg_dst, ptr[reg_params + GET_OFF(dst)]);
+        mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]);
+        mov(reg_table, l_table);
+
+        Xbyak::Label main_loop_label;
+        Xbyak::Label tail_loop_label;
+        Xbyak::Label exit_label;
+
+        int step = vlen / sizeof(float);
+        L(main_loop_label); {
+            cmp(reg_work_amount, step);
+            jl(tail_loop_label, T_NEAR);
+
+            load_vector(vmm_src, ptr[reg_src], jcp_.src_dt);
+            compute_kernel();
+            store_vector(ptr[reg_dst], vmm_src, jcp_.dst_dt);
+
+            add(reg_src, step * jcp_.src_data_size);
+            add(reg_dst, step * jcp_.dst_data_size);
+            sub(reg_work_amount, step);
+
+            jmp(main_loop_label, T_NEAR);
+        }
+
+        step = 1;
+        L(tail_loop_label); {
+            cmp(reg_work_amount, step);
+            jl(exit_label, T_NEAR);
+
+            load_scalar(xmm_src, ptr[reg_src], jcp_.src_dt);
+            compute_kernel();
+            store_scalar(ptr[reg_dst], xmm_src, jcp_.dst_dt);
+
+            add(reg_src, step * jcp_.src_data_size);
+            add(reg_dst, step * jcp_.dst_data_size);
+            sub(reg_work_amount, step);
+
+            jmp(tail_loop_label, T_NEAR);
+        }
+
+        L(exit_label);
+
+        this->postamble();
+
+        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
+            emu_vcvtneps2bf16->emit_data();
+
+        exp_injector->prepare_table();
+
+        prepare_table();
+    }
+
+private:
+    using Vmm = typename conditional3<isa == x64::sse41, Xbyak::Xmm, isa == x64::avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
+    size_t vlen = cpu_isa_traits<isa>::vlen;
+
+    Xbyak::Address table_val(int index) { return ptr[reg_table + index * vlen]; }
+
+    Xbyak::Reg64 reg_src = r8;
+    Xbyak::Reg64 reg_dst = r9;
+    Xbyak::Reg64 reg_table = r10;
+    Xbyak::Reg64 reg_work_amount = r11;
+    Xbyak::Reg64 reg_params = abi_param1;
+
+    Vmm vmm_aux0 = Vmm(0);
+    Vmm vmm_src = Vmm(1);
+    Xbyak::Xmm xmm_src = Xbyak::Xmm(1);
+    Vmm vmm_aux1 = Vmm(2);
+    Vmm vmm_aux2 = Vmm(3);
+
+    const Xbyak::Opmask k_mask = Xbyak::Opmask(1);
+
+    std::unique_ptr<jit_emu_vcvtneps2bf16> emu_vcvtneps2bf16;
+
+    Xbyak::Label l_table;
+
+    std::shared_ptr<jit_uni_eltwise_injector_f32<isa>> exp_injector;
+
+    jit_logistic_config_params jcp_;
+
+    void compute_kernel() {
+        uni_vmovups(vmm_aux0, vmm_src);
+        uni_vandps(vmm_aux0, vmm_aux0, table_val(0));
+        uni_vorps(vmm_src, vmm_src, table_val(0));
+
+        exp_injector->compute_vector_range(vmm_src.getIdx(), vmm_src.getIdx() + 1);
+
+        uni_vmovups(vmm_aux1, vmm_src);
+        uni_vaddps(vmm_aux1, vmm_aux1, table_val(1));
+        uni_vdivps(vmm_src, vmm_src, vmm_aux1);
+
+        uni_vmovups(vmm_aux2, table_val(1));
+        uni_vsubps(vmm_aux2, vmm_aux2, vmm_src);
+
+        if (isa == x64::sse41) {
+            uni_vblendvps(vmm_aux2, vmm_aux2, vmm_src, vmm_aux0);
+            uni_vmovups(vmm_src, vmm_aux2);
+        } else if (isa == x64::avx2) {
+            uni_vblendvps(vmm_src, vmm_aux2, vmm_src, vmm_aux0);
+        } else {
+            vptestmd(k_mask, vmm_aux0, vmm_aux0);
+            vblendmps(vmm_src | k_mask, vmm_aux2, vmm_src);
+        }
+    }
+
+    void prepare_table() {
+        auto broadcast_int = [&](int val) {
+            for (size_t d = 0; d < vlen / sizeof(float); ++d) {
+                dd(val);
+            }
+        };
+
+        align(64);
+        L(l_table);
+
+        broadcast_int(vals_for_logistic_activate.mask_sign);
+        broadcast_int(vals_for_logistic_activate.float_1);
+    }
+
+    const struct vals_for_logistic_activate_type {
+        int mask_sign = 0x80000000;  // 0 //  mask to extract sign
+        int float_1   = 0x3f800000;  // 1 //  1.0f
+    } vals_for_logistic_activate;
+
+    inline void load_vector(Vmm vmm_src, const Xbyak::Address &op, InferenceEngine::Precision src_dt) {
+        switch (src_dt) {
+            case InferenceEngine::Precision::FP32:
+                uni_vmovups(vmm_src, op);
+                break;
+            case InferenceEngine::Precision::BF16:
+                vpmovzxwd(vmm_src, op);
+                uni_vpslld(vmm_src, vmm_src, 16);
+                break;
+            default:
+                assert(!"unknown src_dt");
+        }
+    }
+    inline void store_vector(const Xbyak::Address &op, Vmm vmm_dst, InferenceEngine::Precision dst_dt) {
+        Xbyak::Ymm ymm_dst = Xbyak::Ymm(vmm_dst.getIdx());
+
+        switch (dst_dt) {
+            case InferenceEngine::Precision::FP32:
+                uni_vmovups(op, vmm_dst);
+                break;
+            case InferenceEngine::Precision::BF16:
+                if (mayiuse(avx512_core_bf16))
+                    vcvtneps2bf16(ymm_dst, vmm_dst);
+                else
+                    emu_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
+                vmovdqu16(op, ymm_dst);
+                break;
+            default:
+                assert(!"unknown dst_dt");
+        }
+    }
+    inline void load_scalar(Xbyak::Xmm xmm_src, const Xbyak::Address &op, InferenceEngine::Precision src_dt) {
+        switch (src_dt) {
+            case InferenceEngine::Precision::FP32:
+                movss(xmm_src, op);
+                break;
+            case InferenceEngine::Precision::BF16:
+                pinsrw(xmm_src, op, 0x0);
+                uni_vpslld(xmm_src, xmm_src, 16);
+                break;
+            default:
+                assert(!"unknown src_dt");
+        }
+    }
+    inline void store_scalar(const Xbyak::Address &op, Xbyak::Xmm xmm_dst, InferenceEngine::Precision dst_dt) {
+        switch (dst_dt) {
+            case InferenceEngine::Precision::FP32:
+                movss(op, xmm_dst);
+                break;
+            case InferenceEngine::Precision::BF16:
+                uni_vpsrld(xmm_dst, xmm_dst, 16);
+                pextrw(op, xmm_dst, 0x0);
+                break;
+           default:
+                assert(!"unknown dst_dt");
+        }
+    }
+};
+
+bool MKLDNNRegionYoloNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto regionYolo = std::dynamic_pointer_cast<const ngraph::opset1::RegionYolo>(op);
+        if (!regionYolo) {
+            errorMessage = "Only opset1 RegionYolo operation is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNRegionYoloNode::MKLDNNRegionYoloNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = std::string(op->get_type_name()) + " node with name '" + op->get_friendly_name() + "'";
+    if (op->get_input_size() != 1 || op->get_output_size() != 1)
+        IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+    const auto regionYolo = std::dynamic_pointer_cast<const ngraph::opset1::RegionYolo>(op);
+    classes = regionYolo->get_num_classes();
+    coords = regionYolo->get_num_coords();
+    num = regionYolo->get_num_regions();
+    do_softmax = regionYolo->get_do_softmax();
+    mask = regionYolo->get_mask();
+}
+
+void MKLDNNRegionYoloNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    input_prec = getOriginalInputPrecisionAtPort(0);
+    output_prec = getOriginalOutputPrecisionAtPort(0);
+
+    if (input_prec != Precision::FP32 && input_prec != Precision::BF16) {
+        input_prec = Precision::FP32;
+    }
+
+    if (output_prec != Precision::FP32 && output_prec != Precision::BF16) {
+        output_prec = Precision::FP32;
+    }
+
+    if (Precision::BF16 == output_prec) {
+        if (!mayiuse(avx512_core)) {
+            output_prec = Precision::FP32;
+        }
+    }
+
+    impl_desc_type impl_type;
+    if (mayiuse(x64::avx512_common)) {
+        impl_type = impl_desc_type::jit_avx512;
+    } else if (mayiuse(x64::avx2)) {
+        impl_type = impl_desc_type::jit_avx2;
+    } else if (mayiuse(x64::sse41)) {
+        impl_type = impl_desc_type::jit_sse42;
+    } else {
+        impl_type = impl_desc_type::ref;
+    }
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_prec}},
+                         {{TensorDescCreatorTypes::ncsp, output_prec}},
+                         impl_type);
+}
+
+void MKLDNNRegionYoloNode::createPrimitive() {
+    jit_logistic_config_params jcp;
+    jcp.src_dt = jcp.dst_dt = output_prec;
+    jcp.src_data_size = jcp.dst_data_size = output_prec.size();
+
+    block_size = 1;
+    if (mayiuse(x64::avx512_common)) {
+        logistic_kernel.reset(new jit_uni_logistic_kernel_f32<x64::avx512_common>(jcp));
+        block_size = 16;
+    } else if (mayiuse(x64::avx2)) {
+        logistic_kernel.reset(new jit_uni_logistic_kernel_f32<x64::avx2>(jcp));
+        block_size = 8;
+    } else if (mayiuse(x64::sse41)) {
+        logistic_kernel.reset(new jit_uni_logistic_kernel_f32<x64::sse41>(jcp));
+        block_size = 4;
+    }
+
+    softmax_kernel = std::make_shared<SoftmaxGeneric>(input_prec, output_prec);
+
+    if (logistic_kernel)
+        logistic_kernel->create_ker();
+}
+
+inline float MKLDNNRegionYoloNode::logistic_scalar(float src) {
+    U aux2;
+    aux2.as_float_value = src;
+    int sign = aux2.as_int_value >> 31;
+    if (sign == 0)
+        src *= -1;
+
+    src = std::exp(src);
+
+    src = src / (src + 1);
+    if (sign == 0)
+        src = 1 - src;
+
+    return src;
+}
+
+inline void MKLDNNRegionYoloNode::calculate_logistic(size_t start_index, int count, uint8_t * dst_data) {
+    auto dst_data_size = output_prec.size();
+    if (logistic_kernel) {
+        int blocks_num = div_up(count, block_size);
+        parallel_for(blocks_num, [&](int ib) {
+            int idx = ib * block_size;
+            int work_amount = std::min(count - idx, block_size);
+
+            auto arg = jit_args_logistic();
+            arg.src = arg.dst = dst_data + dst_data_size * (start_index + idx);
+            arg.work_amount = static_cast<size_t>(work_amount);
+
+            (*logistic_kernel)(&arg);
+        });
+    } else {
+        if (Precision::FP32 == output_prec) {
+            auto float_dst_data = reinterpret_cast<float*>(dst_data);
+            for (int i = 0; i < count; i++) {
+                float_dst_data[i + start_index] = logistic_scalar(float_dst_data[i + start_index]);
+            }
+        } else if (Precision::BF16 == output_prec) {
+            auto bf16_dst_data = reinterpret_cast<MKLDNNPlugin::bfloat16_t*>(dst_data);
+            for (int i = 0; i < count; i++) {
+                bf16_dst_data[i + start_index] = logistic_scalar(bf16_dst_data[i + start_index]);
+            }
+        } else {
+            IE_THROW() << "Unsupported precision configuration outPrc=" << output_prec.name();
+        }
+    }
+}
+
+void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) {
+    auto inputDesc = getParentEdgeAt(0)->getDesc();
+    auto outputDesc = getChildEdgeAt(0)->getDesc();
+    size_t mask_size = mask.size();
+
+    size_t IW = (inputDesc.getDims().size() > 3) ? inputDesc.getDims()[3] : 1;
+    size_t IH = (inputDesc.getDims().size() > 2) ? inputDesc.getDims()[2] : 1;
+    size_t IC = (inputDesc.getDims().size() > 1) ? inputDesc.getDims()[1] : 1;
+    size_t B = (inputDesc.getDims().size() > 0) ? inputDesc.getDims()[0] : 1;
+
+    int end_index = 0;
+    int num_ = 0;
+    if (do_softmax) {
+        // Region layer (Yolo v2)
+        end_index = IW * IH;
+        num_ = num;
+    } else {
+        // Yolo layer (Yolo v3)
+        end_index = IW * IH * (classes + 1);
+        num_ = mask_size;
+    }
+    size_t inputs_size = IH * IW * num_ * (classes + coords + 1);
+    size_t total_size = 2 * IH * IW;
+
+    const auto *src_data = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    auto *dst_data = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    cpu_convert(src_data, dst_data, inputDesc.getPrecision(), outputDesc.getPrecision(), B * IC * IH * IW);
+
+    for (int b = 0; b < B; b++) {
+        for (int n = 0; n < num_; n++) {
+            size_t index = b * inputs_size + n * IW * IH * (classes + coords + 1);
+            calculate_logistic(index, total_size, dst_data);
+
+            index = b * inputs_size + IW * IH * (n * (classes + coords + 1) + coords);
+            calculate_logistic(index, end_index, dst_data);
+        }
+    }
+
+    if (do_softmax) {
+        int index = IW * IH * (coords + 1);
+        int batch_offset = inputs_size / num;
+        for (int b = 0; b < B * num; b++) {
+            softmax_kernel->execute(src_data + input_prec.size() * (index + b * batch_offset),
+                                    dst_data + output_prec.size() * (index + b * batch_offset), 1, classes, IH, IW);
+        }
+    }
+}
+
+bool MKLDNNRegionYoloNode::created() const {
+    return getType() == RegionYolo;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNRegionYoloNode, RegionYolo)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.h
new file mode 100644
index 00000000000..c4c4f525e80
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.h
@@ -0,0 +1,76 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+#include <nodes/common/softmax.h>
+
+namespace MKLDNNPlugin {
+
+struct jit_args_logistic {
+    const void* src;
+    void* dst;
+    size_t work_amount;
+};
+
+struct jit_logistic_config_params {
+    InferenceEngine::Precision src_dt;
+    InferenceEngine::Precision dst_dt;
+    unsigned src_data_size = 0;
+    unsigned dst_data_size = 0;
+};
+
+struct jit_uni_logistic_kernel {
+    void (*ker_)(const jit_args_logistic *);
+
+    void operator()(const jit_args_logistic *args) { assert(ker_); ker_(args); }
+
+    virtual void create_ker() = 0;
+
+    jit_uni_logistic_kernel() : ker_(nullptr) {}
+    virtual ~jit_uni_logistic_kernel() {}
+};
+
+class MKLDNNRegionYoloNode : public MKLDNNNode {
+public:
+    MKLDNNRegionYoloNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNRegionYoloNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override;
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    int classes;
+    int coords;
+    int num;
+    float do_softmax;
+    std::vector<int64_t> mask;
+    InferenceEngine::Precision input_prec, output_prec;
+
+    std::string errorPrefix;
+
+    int block_size;
+    std::shared_ptr<jit_uni_logistic_kernel> logistic_kernel;
+    std::shared_ptr<SoftmaxGeneric> softmax_kernel;
+
+    union U {
+        float as_float_value;
+        int as_int_value;
+    };
+
+    inline float logistic_scalar(float src);
+    inline void calculate_logistic(size_t start_index, int count, uint8_t * dst_data);
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
index 3cbe7ef2aee..865513ec393 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
@@ -16,10 +16,12 @@ using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNReorderNode::MKLDNNReorderNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) :
-        MKLDNNNode(layer, eng, w_cache) {
-}
+MKLDNNReorderNode::MKLDNNReorderNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) :
+        MKLDNNNode(op, eng, w_cache) {}
 
+MKLDNNReorderNode::MKLDNNReorderNode(const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) :
+        MKLDNNNode("Reorder", name, eng, w_cache) {
+}
 void MKLDNNReorderNode::getSupportedDescriptors() {
     if (outDims.empty() && output.getLayout() != InferenceEngine::Layout::ANY)
         outDims.push_back(MKLDNNDims(output.getDims()));
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
index 41fc7279781..85112c36875 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
@@ -14,7 +14,10 @@ namespace MKLDNNPlugin {
 
 class MKLDNNReorderNode : public MKLDNNNode {
 public:
-    MKLDNNReorderNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    // TODO [NM]: do we need ngraph::Node based ctor at all?
+    MKLDNNReorderNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNReorderNode(const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+
     ~MKLDNNReorderNode() override = default;
 
     void getSupportedDescriptors() override;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
index 4893546499a..543e0a86bcb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
@@ -3,7 +3,6 @@
 //
 
 #include "mkldnn_reshape_node.h"
-#include <legacy/ie_layers.h>
 #include <string>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
@@ -12,8 +11,8 @@ using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNReshapeNode::MKLDNNReshapeNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+MKLDNNReshapeNode::MKLDNNReshapeNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {}
 
 void MKLDNNReshapeNode::getSupportedDescriptors() {
     if (getParentEdges().size() != 1 && getParentEdges().size() != 2)
@@ -26,9 +25,9 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-    precision = getCnnLayer()->outData[0]->getPrecision();
+    precision = getOriginalOutputPrecisionAtPort(0);
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
     // Current reshape implementation is simple memory reinterpret,
@@ -63,7 +62,6 @@ void MKLDNNReshapeNode::createPrimitive() {
 }
 
 bool MKLDNNReshapeNode::created() const {
-    return getType() == Reshape || getType() == Flatten;
+    return getType() == Reshape;
 }
 REG_MKLDNN_PRIM_FOR(MKLDNNReshapeNode, Reshape);
-REG_MKLDNN_PRIM_FOR(MKLDNNReshapeNode, Flatten);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h
index b4776fd7332..12f7009b453 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h
@@ -14,7 +14,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNReshapeNode : public MKLDNNNode {
 public:
-    MKLDNNReshapeNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNReshapeNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNReshapeNode() override = default;
 
     void getSupportedDescriptors() override;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
index 9b220b0a9a6..608df078925 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
@@ -3,78 +3,99 @@
 //
 
 #include "mkldnn_rnn.h"
-#include "mkldnn_extension_utils.h"
-
-#include "mkldnn_node.h"
-#include "utils/general_utils.h"
+#include <utils/general_utils.h>
 #include "nodes/common/cpu_memcpy.h"
-#include "utils/bfloat16.hpp"
 #include "nodes/common/cpu_convert.h"
+#include "utils/bfloat16.hpp"
+#include "mkldnn_input_node.h"
+#include <mkldnn_extension_utils.h>
+
+#include <ngraph/node.hpp>
 
 #include <string>
 #include <utility>
 
-#define THROW_ERROR IE_THROW() << NameFromType(getType()) << " layer '" << getName() << "' "
-
 using namespace mkldnn;
 using namespace InferenceEngine;
 
 namespace MKLDNNPlugin {
 
-using _RNN = RNNSequenceLayer;  // alias
-
-static rnn_direction ie2mkl(_RNN::Direction &direction) {
-    return direction == _RNN::FWD ? rnn_direction::unidirectional_left2right
-         : direction == _RNN::BWD ? rnn_direction::unidirectional_right2left
-         : direction == _RNN::BDR ? rnn_direction::bidirectional_concat
+static rnn_direction ieDirection2dnnl(const std::shared_ptr<const ngraph::Node>& op) {
+    ngraph::op::RecurrentSequenceDirection direction = ngraph::op::RecurrentSequenceDirection::FORWARD;
+    if (op->get_type_info() == ngraph::op::v5::GRUSequence::type_info) {
+        direction = ngraph::as_type_ptr<const ngraph::op::v5::GRUSequence>(op)->get_direction();
+    } else if (op->get_type_info() == ngraph::op::v0::LSTMSequence::type_info) {
+        direction = ngraph::as_type_ptr<const ngraph::op::v0::LSTMSequence>(op)->get_direction();
+    } else if (op->get_type_info() == ngraph::op::v5::LSTMSequence::type_info) {
+        direction = ngraph::as_type_ptr<const ngraph::op::v5::LSTMSequence>(op)->get_direction();
+    } else if (op->get_type_info() == ngraph::op::v5::RNNSequence::type_info) {
+        direction = ngraph::as_type_ptr<const ngraph::op::v5::RNNSequence>(op)->get_direction();
+    }
+    return direction == ngraph::op::RecurrentSequenceDirection::FORWARD ? rnn_direction::unidirectional_left2right
+         : direction == ngraph::op::RecurrentSequenceDirection::REVERSE ? rnn_direction::unidirectional_right2left
+         : direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? rnn_direction::bidirectional_concat
          : rnn_direction::unidirectional;
 }
 
-static algorithm ie2mkl(std::string act_type) {
-    return act_type == "sigmoid" ? algorithm::eltwise_logistic
-         : act_type == "tanh"    ? algorithm::eltwise_tanh
-         : act_type == "relu"    ? algorithm::eltwise_relu
-         : algorithm::undef;
+static mkldnn::algorithm ie2dnnl(std::string act_type) {
+    return act_type == "sigmoid" ? mkldnn::algorithm::eltwise_logistic
+         : act_type == "tanh"    ? mkldnn::algorithm::eltwise_tanh
+         : act_type == "relu"    ? mkldnn::algorithm::eltwise_relu
+         : mkldnn::algorithm::undef;
 }
 
-static algorithm ie2mkl(RNNCellBase::CellType cell_type) {
-    switch (cell_type) {
-        case RNNCellBase::RNN:     return algorithm::vanilla_rnn;
-        case RNNCellBase::LSTM:    return algorithm::vanilla_lstm;
-        case RNNCellBase::GRU:     return algorithm::vanilla_gru;
-        case RNNCellBase::GRU_LBR: return algorithm::lbr_gru;
-        default:
-            IE_THROW() << "RNN node. Unsupported cell type";
-            return algorithm::undef;
+static mkldnn::algorithm ie2dnnl(const std::shared_ptr<const ngraph::Node>& op) {
+    if (one_of(op->get_type_info(),
+            ngraph::op::v3::GRUCell::type_info,
+            ngraph::op::v5::GRUSequence::type_info)) {
+        auto gruCellOp = ngraph::as_type_ptr<const ngraph::op::v3::GRUCell>(op);
+        auto gruSeqOp = ngraph::as_type_ptr<const ngraph::op::v5::GRUSequence>(op);
+        if ((gruCellOp && gruCellOp->get_linear_before_reset()) ||
+                (gruSeqOp && gruSeqOp->get_linear_before_reset()))
+            return mkldnn::algorithm::lbr_gru;
+        else
+            return mkldnn::algorithm::vanilla_gru;
+    } else if (one_of(op->get_type_info(),
+            ngraph::op::v0::LSTMCell::type_info,
+            ngraph::op::v4::LSTMCell::type_info,
+            ngraph::op::v0::LSTMSequence::type_info,
+            ngraph::op::v5::LSTMSequence::type_info)) {
+        return mkldnn::algorithm::vanilla_lstm;
+    } else if (one_of(op->get_type_info(),
+            ngraph::op::v0::RNNCell::type_info,
+            ngraph::op::v5::RNNSequence::type_info)) {
+        return mkldnn::algorithm::vanilla_rnn;
+    } else {
+        IE_THROW() << "Unsupported cell type";
     }
 }
 
-size_t gatesCount(algorithm alg) {
+size_t gatesCount(mkldnn::algorithm alg) {
     switch (alg) {
-        case algorithm::vanilla_rnn:     return 1;
-        case algorithm::vanilla_gru:
-        case algorithm::lbr_gru:         return 3;
-        case algorithm::vanilla_lstm:    return 4;
+        case mkldnn::algorithm::vanilla_rnn:     return 1;
+        case mkldnn::algorithm::vanilla_gru:
+        case mkldnn::algorithm::lbr_gru:         return 3;
+        case mkldnn::algorithm::vanilla_lstm:    return 4;
         default:
-            IE_THROW() << "RNN node. Unsupported cell type";
+            IE_THROW() << "Unsupported cell type";
             return 0;
     }
 }
 
-size_t statesCount(algorithm alg) {
+size_t statesCount(mkldnn::algorithm alg) {
     switch (alg) {
-        case algorithm::vanilla_rnn:
-        case algorithm::vanilla_gru:
-        case algorithm::lbr_gru:         return 1;
-        case algorithm::vanilla_lstm:    return 2;
+        case mkldnn::algorithm::vanilla_rnn:
+        case mkldnn::algorithm::vanilla_gru:
+        case mkldnn::algorithm::lbr_gru:         return 1;
+        case mkldnn::algorithm::vanilla_lstm:    return 2;
         default:
-            IE_THROW() << "RNN node. Unsupported cell type";
+            IE_THROW() << "Unsupported cell type";
             return 0;
     }
 }
 
-bool haveCellState(algorithm alg) {
-    return alg == algorithm::vanilla_lstm;
+bool haveCellState(mkldnn::algorithm alg) {
+    return alg == mkldnn::algorithm::vanilla_lstm;
 }
 
 const std::map<InferenceEngine::Precision, InferenceEngine::Precision> MKLDNNRNN::weightsByLayerPrec {
@@ -86,9 +107,121 @@ const std::map<InferenceEngine::Precision, InferenceEngine::Precision> MKLDNNRNN
     // {InferenceEngine::Precision::U8,   InferenceEngine::Precision::I8},
 };
 
-MKLDNNRNN::MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {
-    is_cell = one_of(layer->type, "LSTMCell", "GRUCell", "RNNCell");
+bool MKLDNNRNN::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (!one_of(op->get_type_info(),
+                ngraph::op::v3::GRUCell::type_info,
+                ngraph::op::v0::LSTMCell::type_info,
+                ngraph::op::v4::LSTMCell::type_info,
+                ngraph::op::v0::RNNCell::type_info,
+                ngraph::op::v5::GRUSequence::type_info,
+                ngraph::op::v0::LSTMSequence::type_info,
+                ngraph::op::v5::LSTMSequence::type_info,
+                ngraph::op::v5::RNNSequence::type_info)) {
+            errorMessage = "Unsupported RNN operation.";
+            return false;
+        }
+
+        if (one_of(op->get_type_info(), ngraph::op::v0::RNNCell::type_info, ngraph::op::v3::GRUCell::type_info)) {
+            if (op->get_input_size() != 5) {
+                errorMessage = "Node expects 5 inputs. Actual: " + std::to_string(op->get_input_size());
+                return false;
+            }
+            if (op->get_input_node_ptr(2)->get_type_info() != ngraph::op::v0::Constant::type_info ||
+                    op->get_input_node_ptr(3)->get_type_info() != ngraph::op::v0::Constant::type_info ||
+                    op->get_input_node_ptr(4)->get_type_info() != ngraph::op::v0::Constant::type_info) {
+                errorMessage = "Node expects constants as W, R, B inputs.";
+                return false;
+            }
+        } else if (one_of(op->get_type_info(),
+                ngraph::op::v0::LSTMCell::type_info,
+                ngraph::op::v4::LSTMCell::type_info,
+                ngraph::op::v5::GRUSequence::type_info,
+                ngraph::op::v5::RNNSequence::type_info)) {
+            if (op->get_input_size() != 6) {
+                errorMessage = "Node expects 6 inputs. Actual: " + std::to_string(op->get_input_size());
+                return false;
+            }
+            if (op->get_input_node_ptr(3)->get_type_info() != ngraph::op::v0::Constant::type_info ||
+                    op->get_input_node_ptr(4)->get_type_info() != ngraph::op::v0::Constant::type_info ||
+                    op->get_input_node_ptr(5)->get_type_info() != ngraph::op::v0::Constant::type_info) {
+                errorMessage = "Node expects constants as W, R, B inputs.";
+                return false;
+            }
+        } else if (one_of(op->get_type_info(),
+                ngraph::op::v0::LSTMSequence::type_info,
+                ngraph::op::v5::LSTMSequence::type_info)) {
+            if (op->get_input_size() != 7) {
+                errorMessage = "Node expects 7 inputs. Actual: " + std::to_string(op->get_input_size());
+                return false;
+            }
+            if (op->get_input_node_ptr(4)->get_type_info() != ngraph::op::v0::Constant::type_info ||
+                    op->get_input_node_ptr(5)->get_type_info() != ngraph::op::v0::Constant::type_info ||
+                    op->get_input_node_ptr(6)->get_type_info() != ngraph::op::v0::Constant::type_info) {
+                errorMessage = "Node expects constants as W, R, B inputs.";
+                return false;
+            }
+        }
+
+        auto rnnCellBase = std::dynamic_pointer_cast<const ngraph::op::util::RNNCellBase>(op);
+        if (rnnCellBase && rnnCellBase->get_clip() != 0.0f) {
+            errorMessage = "Clipping is not supported for RNN primitive.";
+            return false;
+        }
+
+        ngraph::op::RecurrentSequenceDirection direction = ngraph::op::RecurrentSequenceDirection::FORWARD;
+        if (op->get_type_info() == ngraph::op::v5::GRUSequence::type_info) {
+            direction = ngraph::as_type_ptr<const ngraph::op::v5::GRUSequence>(op)->get_direction();
+        } else if (op->get_type_info() == ngraph::op::v0::LSTMSequence::type_info) {
+            direction = ngraph::as_type_ptr<const ngraph::op::v0::LSTMSequence>(op)->get_direction();
+        } else if (op->get_type_info() == ngraph::op::v5::LSTMSequence::type_info) {
+            direction = ngraph::as_type_ptr<const ngraph::op::v5::LSTMSequence>(op)->get_direction();
+        } else if (op->get_type_info() == ngraph::op::v5::RNNSequence::type_info) {
+            direction = ngraph::as_type_ptr<const ngraph::op::v5::RNNSequence>(op)->get_direction();
+        }
+        if (!one_of(direction, ngraph::op::RecurrentSequenceDirection::FORWARD, ngraph::op::RecurrentSequenceDirection::REVERSE)) {
+            errorMessage = "Unsupported sequence direction.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNRNN::MKLDNNRNN(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    is_cell = one_of(op->get_type_info(),
+            ngraph::op::v0::RNNCell::type_info,
+            ngraph::op::v3::GRUCell::type_info,
+            ngraph::op::v0::LSTMCell::type_info,
+            ngraph::op::v4::LSTMCell::type_info);
+
+    if (one_of(op->get_type_info(),
+               ngraph::op::v0::RNNCell::type_info,
+               ngraph::op::v3::GRUCell::type_info)) {
+        wIdx = 2; rIdx = 3; bIdx = 4;
+    } else if (one_of(op->get_type_info(),
+                      ngraph::op::v5::RNNSequence::type_info,
+                      ngraph::op::v0::LSTMCell::type_info,
+                      ngraph::op::v4::LSTMCell::type_info,
+                      ngraph::op::v5::GRUSequence::type_info)) {
+        wIdx = 3; rIdx = 4; bIdx = 5;
+    } else if (one_of(op->get_type_info(),
+                      ngraph::op::v0::LSTMSequence::type_info,
+                      ngraph::op::v5::LSTMSequence::type_info)) {
+        wIdx = 4; rIdx = 5; bIdx = 6;
+    }
+
+    if (is_cell)
+        initCell(op);
+    else
+        initSeq(op);
 }
 
 bool MKLDNNRNN::created() const {
@@ -96,44 +229,26 @@ bool MKLDNNRNN::created() const {
 }
 
 void MKLDNNRNN::getSupportedDescriptors() {
-    runtimePrecision = getCnnLayer()->insData[0].lock()->getPrecision();
-
     if (is_cell)
         fillCellDesc();
     else
         fillSeqDesc();
 }
 
-void MKLDNNRNN::fillCellDesc() {
-    if (!descs.empty()) return;
-    auto cellLayer = std::dynamic_pointer_cast<RNNCellBase>(getCnnLayer());
+void MKLDNNRNN::initCell(const std::shared_ptr<ngraph::Node>& op) {
+    auto rnnCellBase = std::dynamic_pointer_cast<ngraph::op::util::RNNCellBase>(op);
+    if (!rnnCellBase)
+        IE_THROW() << "No original layer for RNNCell.";
 
-    if (!cellLayer)
-        THROW_ERROR << "No original layer for RNNCell.";
+    cell_type = ie2dnnl(op);
+    cell_act = ie2dnnl(rnnCellBase->get_activations()[0]);  // Works only for RNN with one gate
 
-    cell_type = ie2mkl(cellLayer->cellType);
-    cell_act = ie2mkl(cellLayer->activations[0]);  // Works only for RNN with one gate
+    auto in_data_dims = op->get_input_shape(0);
+    auto in_h_state_dims = op->get_input_shape(1);
+    auto out_h_state_dims = op->get_output_shape(0);
 
-    if (cellLayer->clip != 0.0f) {
-        // TODO [oneDNN]: No more supported
-        THROW_ERROR << "Clipping is not supported for RNN primitive";
-//        cell_desc.set_clipping(cellLayer->clip);
-    }
-
-    auto &ins = cellLayer->insData;
-    auto &outs = cellLayer->outData;
-
-    if (!one_of(ins.size(), 3, 2))
-        THROW_ERROR << "Incorrect number of input ports for layer " << getName();
-    if (!one_of(outs.size(), 2, 1))
-        THROW_ERROR << "Incorrect number of output ports for layer " << getName();
-
-    auto in_data_dims = getParentEdgeAt(0)->getDims();
-    auto in_h_state_dims = getParentEdgeAt(1)->getDims();
-    auto out_h_state_dims = getChildEdgeAt(0)->getDims();
-
-    if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2)
-        THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
+    if (in_data_dims.size() != 2 || in_h_state_dims.size() != 2)
+        IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
 
     G = gatesCount(cell_type);
     S = statesCount(cell_type);
@@ -147,59 +262,56 @@ void MKLDNNRNN::fillCellDesc() {
     // Expected shapes
     MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
 
-    if (in_data_dims != D_shape
-        || in_h_state_dims != S_shape
-        || out_h_state_dims != S_shape)
-        THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
+    if (in_data_dims != D_shape.ToSizeVector()
+        || in_h_state_dims != S_shape.ToSizeVector()
+        || out_h_state_dims != S_shape.ToSizeVector())
+        IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
 
     if (S == 2) {
-        auto in_c_state_dims = getParentEdgeAt(2)->getDims();
-        auto out_c_state_dims = getChildEdgeAt(1)->getDims();
+        auto in_c_state_dims = op->get_input_shape(2);
+        auto out_c_state_dims = op->get_output_shape(1);
 
-        if (in_c_state_dims != S_shape
-            || out_c_state_dims != S_shape)
-            THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
+        if (in_c_state_dims != S_shape.ToSizeVector()
+            || out_c_state_dims != S_shape.ToSizeVector())
+            IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
     }
+}
 
-    auto blobs = cellLayer->blobs;
-    Blob::Ptr weights, bias;
-    if (blobs.find("weights") != blobs.end()) weights = blobs["weights"];
-    if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
-
-    if (!weights)
-        THROW_ERROR << "RNN Layer. Weights do not present.";
-
-    if (weights->size() != G * SC * (SC + DC))
-        THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC);
-
-    if (bias && bias->size() != Gb * SC)
-        THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC;
-
+void MKLDNNRNN::fillCellDesc() {
+    runtimePrecision = getOriginalInputPrecisionAtPort(0);
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
 
+    MKLDNNDims S_4D_shape {L, D, N, SC};
+
     // layer input plus states
     in_data_d.resize(S + 1);
     out_data_d.resize(S + 1);
 
     // Shapes and Attributes are correct. Can start internal stuff initialization.
-    in_data_d[RNNInOutKind::Layer]  = {{T, N, DC}, dataType, memory::format_tag::tnc};
-    out_data_d[RNNInOutKind::Layer] = {{T, N, SC}, dataType, memory::format_tag::tnc};
+    in_data_d[RNNInOutKind::Layer]  = {MKLDNNDims{T, N, DC}, dataType, memory::format_tag::tnc};
+    out_data_d[RNNInOutKind::Layer] = {MKLDNNDims{T, N, SC}, dataType, memory::format_tag::tnc};
 
     in_data_d[RNNInOutKind::HiddenState]  = {S_4D_shape, dataType, memory::format_tag::ldnc};
     out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
 
     if (haveCellState(cell_type)) {
-        in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
+        in_data_d[RNNInOutKind::CellState] =  {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
         out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
     }
 
     w_data_d   = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
     w_state_d  = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
 
-    if (bias)
-        w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
+    // Add 5th input
+    w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
 
+    copyWeightsData();
+
+    // Expected shapes
+    MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb};
     std::vector<TensorDesc> in_candidate, out_candidate;
+    in_candidate.reserve(6);
+
     in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc});
     in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
     out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
@@ -208,167 +320,271 @@ void MKLDNNRNN::fillCellDesc() {
         in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
         out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
     }
-
-    Precision weights_prec = as<MemoryBlob>(weights)->getTensorDesc().getPrecision();
-
-    if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) {
-        if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32)
-            convertWeightsBlobToBF16();
+    if (one_of(cell_type, mkldnn::algorithm::vanilla_rnn, mkldnn::algorithm::vanilla_gru, mkldnn::algorithm::lbr_gru, mkldnn::algorithm::vanilla_lstm)) {
+        in_candidate.emplace_back(MKLDNNMemoryDesc {WShape, memory::data_type::f32, memory::format_tag::nc});
+        in_candidate.emplace_back(MKLDNNMemoryDesc {RShape, memory::data_type::f32, memory::format_tag::nc});
+        in_candidate.emplace_back(MKLDNNMemoryDesc {BShape, memory::data_type::f32, memory::format_tag::x});
     }
 
     createDescriptor(in_candidate, out_candidate);
 }
 
-void MKLDNNRNN::fillSeqDesc() {
-    if (!descs.empty()) return;
-    auto rnnLayer = std::dynamic_pointer_cast<RNNSequenceLayer>(getCnnLayer());
+void MKLDNNRNN::initSeq(const std::shared_ptr<ngraph::Node>& op) {
+    auto rnnCellBase = std::dynamic_pointer_cast<ngraph::op::util::RNNCellBase>(op);
+    if (!rnnCellBase)
+        IE_THROW() << "No original layer for RNNCell.";
 
-    if (!rnnLayer)
-        THROW_ERROR << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer.";
+    cell_type = ie2dnnl(op);
+    cell_act = mkldnn::algorithm::undef;
+    if (!rnnCellBase->get_activations().empty())
+        cell_act = ie2dnnl(rnnCellBase->get_activations()[0]);  // Works only for RNN with one gate
 
-    if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN))
-        THROW_ERROR << "RNN layer supports only LSTM/GRU/RNN cell";
+    direction = ieDirection2dnnl(op);
 
-    cell_type = ie2mkl(rnnLayer->cellType);
-    cell_act = algorithm::undef;
-    if (!rnnLayer->activations.empty())
-        cell_act = ie2mkl(rnnLayer->activations[0]);  // Works only for RNN with one gate
+    if (!one_of(op->get_input_size(), 6, 7))
+        IE_THROW() << "Incorrect number of input ports for layer " << getName();
+    if (!one_of(op->get_output_size(), 2, 3))
+        IE_THROW() << "Incorrect number of output ports for layer " << getName();
 
-    // TODO [oneDNN]: No more supported
-    if (rnnLayer->clip != 0.0f) {
-        THROW_ERROR << "Clipping is not supported for RNN primitive";
-//        cell_desc.set_clipping(rnnLayer->clip);
+    in_data_dims = op->get_input_shape(0);
+    out_data_dims = op->get_output_shape(0);
+
+    if (in_data_dims.size() != 3 || out_data_dims.size() != 4)
+        IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
+
+    N = op->get_input_shape(1)[0];
+    nativeOrder = false;
+    const auto rtInfo = op->get_rt_info();
+
+    if (rtInfo.count("seqAxis")) {
+        nativeOrder = std::dynamic_pointer_cast<ngraph::VariantWrapper<int64_t>>(rtInfo.at("seqAxis"))->get() == 0;
     }
+    out_data_dims.erase(out_data_dims.begin() + 1);
 
-    if (!one_of(rnnLayer->axis, 0, 1))
-        THROW_ERROR << "RNN layer supports only sequence axis 0 or 1";
-    nativeOrder = rnnLayer->axis == 0;
-
-    if (!one_of(rnnLayer->direction, _RNN::FWD, _RNN::BWD))
-        THROW_ERROR << "RNN layer supports only unidirectional RNN layer";
-    direction = ie2mkl(rnnLayer->direction);
-
-    auto &ins = rnnLayer->insData;
-    auto &outs = rnnLayer->outData;
-
-    if (!one_of(ins.size(), 3, 2, 1))
-        THROW_ERROR << "Incorrect number of input ports for layer " << getName();
-    if (!one_of(outs.size(), 3, 2, 1))
-        THROW_ERROR << "Incorrect number of output ports for layer " << getName();
-
-    auto in_data_dims = getParentEdgeAt(0)->getDims();
-    auto out_data_dims = getChildEdgeAt(0)->getDims();
-
-    if (in_data_dims.ndims() != 3 || out_data_dims.ndims() != 3)
-        THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
-
-    if (!nativeOrder) {
-        std::swap(in_data_dims[0], in_data_dims[1]);
-        std::swap(out_data_dims[0], out_data_dims[1]);
-    }
+    std::swap(in_data_dims[0], in_data_dims[1]);
+    std::swap(out_data_dims[0], out_data_dims[1]);
 
     G = gatesCount(cell_type);
     S = statesCount(cell_type);
     T = in_data_dims[0];
-    N = in_data_dims[1];
     DC = in_data_dims[2];
-    SC = out_data_dims[2];
+    SC = rnnCellBase->get_hidden_size();
 
     Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1;
 
-    MKLDNNDims ID_shape {T, N, DC}, OD_shape {T, N, SC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
-
-    if (out_data_dims != OD_shape)
-        THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
-
-    auto& blobs = rnnLayer->blobs;
-    Blob::Ptr weights, bias;
-    if (blobs.find("weights") != blobs.end()) weights = blobs["weights"];
-    if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
-
-    if (!weights)
-        THROW_ERROR << "RNN Layer. Weights do not present.";
-
-    if (weights->size() != G * SC * (SC + DC))
-        THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC);
-
-    for (int i = 1; i < ins.size(); i++) {
-        if (getParentEdgeAt(i)->getDims() != S_shape)
-            THROW_ERROR << "Incorrect shape of state ports for layer " << getName();
-    }
-
-    for (int i = 1; i < outs.size(); i++) {
-        if (getChildEdgeAt(i)->getDims() != S_shape)
-            THROW_ERROR << "Incorrect shape of state ports for layer " << getName();
-    }
-
     // layer input plus states
     in_data_d.resize(S + 1);
     out_data_d.resize(S + 1);
+}
 
+void MKLDNNRNN::fillSeqDesc() {
+    runtimePrecision = getOriginalInputPrecisionAtPort(0);
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
 
-   // Try to create descriptor and corresponding configuration
-    in_data_d[RNNInOutKind::Layer]  = {in_data_dims,  dataType, memory::format_tag::tnc};
-    out_data_d[RNNInOutKind::Layer] = {out_data_dims, dataType, memory::format_tag::tnc};
+    MKLDNNDims S_4D_shape {L, D, N, SC};
 
-    in_data_d[RNNInOutKind::HiddenState]  = {S_4D_shape, dataType, memory::format_tag::ldnc};
-    out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
+    // Try to create descriptor and corresponding configuration
+    in_data_d[RNNInOutKind::Layer]  = {MKLDNNDims{in_data_dims},  dataType, memory::format_tag::tnc};
+    out_data_d[RNNInOutKind::Layer] = {MKLDNNDims{out_data_dims}, dataType, memory::format_tag::tnc};
+
+    in_data_d[RNNInOutKind::HiddenState]  = {MKLDNNDims{S_4D_shape}, dataType, memory::format_tag::ldnc};
+    out_data_d[RNNInOutKind::HiddenState] = {MKLDNNDims{S_4D_shape}, dataType, memory::format_tag::ldnc};
 
     if (haveCellState(cell_type)) {
-        in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
-        out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
+        in_data_d[RNNInOutKind::CellState] = {MKLDNNDims{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc};
+        out_data_d[RNNInOutKind::CellState] = {MKLDNNDims{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc};
     }
 
     w_data_d  = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
     w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
 
-    if (bias && bias->size() != Gb * SC)
-        THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC;
+    w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
 
-    if (bias)
-        w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
+    copyWeightsData();
 
-    std::vector<TensorDesc> in_candidate, out_candidate;
+    std::vector<TensorDesc> in_candidate;
+
+    if (nativeOrder)
+        in_candidate.push_back(MKLDNNMemoryDesc{inDims[RNNInOutKind::Layer], dataType, memory::format_tag::tnc});
+    else
+        in_candidate.push_back(MKLDNNMemoryDesc{{N, T, DC}, dataType, memory::format_tag::ntc});
+
+    in_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, dataType, memory::format_tag::ntc}); // initial hidden state
+    if (haveCellState(cell_type))
+        in_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc}); // initial cell state
+    in_candidate.push_back(MKLDNNMemoryDesc{{N}, memory::data_type::s32, memory::format_tag::x}); // sequence lengths
+    in_candidate.push_back(MKLDNNMemoryDesc{{D, G * SC, DC}, memory::data_type::f32, memory::format_tag::ntc}); // W
+    in_candidate.push_back(MKLDNNMemoryDesc{{D, G * SC, SC}, memory::data_type::f32, memory::format_tag::ntc}); // R
+    in_candidate.push_back(MKLDNNMemoryDesc{{D, Gb * SC}, memory::data_type::f32, memory::format_tag::nc}); // B
+
+    std::vector<TensorDesc> out_candidate;
 
     if (nativeOrder) {
-        in_candidate.push_back(in_data_d[RNNInOutKind::Layer]);
         out_candidate.push_back(out_data_d[RNNInOutKind::Layer]);
     } else {
-        in_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, DC}, dataType, memory::format_tag::ntc});
-        out_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, SC}, dataType, memory::format_tag::ntc});
+        // TODO reorder ntc -> ndtc does not work, thus use tnc(plain) + transformation reshape-transpose-reshape for now.
+        out_candidate.push_back(MKLDNNMemoryDesc{{T, N, SC}, dataType, memory::format_tag::tnc});
     }
 
-    in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc});
-    out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc});
-
-    if (haveCellState(cell_type)) {
-        in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc});
-        out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc});
-    }
-
-    Precision weights_prec = as<MemoryBlob>(weights)->getTensorDesc().getPrecision();
-
-    if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) {
-        if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32)
-            convertWeightsBlobToBF16();
-    }
+    out_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, dataType, memory::format_tag::ntc});
+    if (haveCellState(cell_type))
+        out_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc});
 
     createDescriptor(in_candidate, out_candidate);
 }
 
-void MKLDNNRNN::convertWeightsBlobToBF16() {
-    Blob::Ptr &weights = getCnnLayer()->blobs["weights"];
-    MemoryBlob::Ptr cur_weights = as<MemoryBlob>(weights);
-    TensorDesc td(Precision::BF16, cur_weights->getTensorDesc().getDims(), cur_weights->getTensorDesc().getLayout());
-    MemoryBlob::Ptr new_weights_blob = make_shared_blob<uint16_t>(td);
+bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) {
+    if (!weightsByLayerPrec.count(layerPrec))
+        IE_THROW() << "Unsupported layer precision " << layerPrec;
+    return weightsPrec == weightsByLayerPrec.at(layerPrec);
+}
 
-    new_weights_blob->allocate();
-    bfloat16_t *dst = new_weights_blob->wmap();
+template <typename Prec>
+void MKLDNNRNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t rIdx) {
+    const auto weightPrec = getOriginalInputPrecisionAtPort(wIdx);
+    if (!verifyWeightsPrecision(runtimePrecision, weightPrec) && runtimePrecision != Precision::BF16 && weightPrec != Precision::FP32) {
+        IE_THROW() << "Doesn't support combination of weights precision: " << weightPrec << " and runtime precision: " << runtimePrecision;
+    }
+    // create weight blobs (data and state part)
+    auto w_data_mem = std::make_shared<MKLDNNMemory>(getEngine());
+    w_data_mem->Create(w_data_d);
+    internalBlobMemory.push_back(w_data_mem);
+    auto w_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
+    w_state_mem->Create(w_state_d);
+    internalBlobMemory.push_back(w_state_mem);
 
-    float* fp32src = cur_weights->rmap().as<float*>();
-    cpu_convert(fp32src, dst, Precision::FP32, Precision::BF16, new_weights_blob->size());
-    weights = new_weights_blob;
+    const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getDims().size();
+    const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getDims().size();
+
+    auto *wInputNode = dynamic_cast<MKLDNNInputNode *>(getParentEdgesAtPort(wIdx)[0]->getParent().get());
+    auto wConstBlob = wInputNode->getConstBlob();
+
+    auto *rInputNode = dynamic_cast<MKLDNNInputNode *>(getParentEdgesAtPort(rIdx)[0]->getParent().get());
+    auto rConstBlob = rInputNode->getConstBlob();
+
+    std::vector<Prec> ie_w_vec(ie_w_vec_size), ie_r_vec(ie_r_vec_size);
+
+    auto ie_w_ptr = ie_w_vec.data();
+    auto ie_r_ptr = ie_r_vec.data();
+    cpu_convert(wConstBlob->cbuffer().as<int8_t *>(), ie_w_ptr, weightPrec, runtimePrecision, ie_w_vec_size);
+    cpu_convert(rConstBlob->cbuffer().as<int8_t *>(), ie_r_ptr, weightPrec, runtimePrecision, ie_r_vec_size);
+
+    auto w_ptr = static_cast<Prec*>(w_data_mem->GetData());
+    auto r_ptr = static_cast<Prec*>(w_state_mem->GetData());
+    const int step = SC * G;
+
+    for (int g = 0; g < G; g++) {
+        for (int out_i = 0; out_i < SC; out_i++) {
+            Prec *l_w_ptr = w_ptr + gate_map[g] * SC + out_i;
+            for (int in_i = 0; in_i < DC; in_i++) {
+                *l_w_ptr = *ie_w_ptr;
+                ie_w_ptr++;
+                l_w_ptr += step;
+            }
+
+            Prec *l_r_ptr = r_ptr + gate_map[g] * SC + out_i;
+            for (int in_i = 0; in_i < SC; in_i++) {
+                *l_r_ptr = *ie_r_ptr;
+                ie_r_ptr++;
+                l_r_ptr += step;
+            }
+        }
+    }
+}
+
+template <InferenceEngine::Precision::ePrecision Prec>
+void MKLDNNRNN::fillBiases(const int *gate_map) {
+    using dataType = typename PrecisionTrait<Prec>::value_type;
+
+    if (!w_bias_d)
+        return;
+
+    if (getOriginalInputPrecisionAtPort(bIdx) != Precision::FP32) {
+        IE_THROW() << "Doesn't support bias precision: " << getOriginalInputPrecisionAtPort(bIdx);
+    }
+
+    auto w_bias_mem = std::make_shared<MKLDNNMemory>(getEngine());
+    w_bias_mem->Create(w_bias_d);
+    internalBlobMemory.push_back(w_bias_mem);
+
+    auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(getParentEdgesAtPort(bIdx)[0]->getParent().get());
+    auto constBlob = constInputNode->getConstBlob();
+    auto srtPtr = constBlob->cbuffer().as<int8_t *>();
+
+    std::vector<dataType> ie_b_vec(constBlob->size());
+    cpu_convert(srtPtr, &ie_b_vec[0], constBlob->getTensorDesc().getPrecision(), Prec, constBlob->size());
+
+    auto b_ptr = static_cast<dataType*>(w_bias_mem->GetData());
+    for (int g = 0; g < Gb; g++) {
+        dataType *l_b_ptr = b_ptr + gate_map[g] * SC;
+        const dataType *l_ie_b_ptr = &ie_b_vec[g * SC];
+        cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(typename PrecisionTrait<Prec>::value_type));
+    }
+}
+
+void MKLDNNRNN::copyWeightsData() {
+    /* Copy Weight data
+     * IE format:
+     *   W - [gates, out_state_size, in_data_size]
+     *   R - [gates, out_state_size, in_state_size]
+     *   B - [gates, out_state_size]
+     *
+     * DNNL format:
+     *   W - [1, 1, in_date_size,  gates, out_state_size]
+     *   R - [1, 1, in_state_size, gates, out_state_size]
+     *   B - [gates, out_state_size]
+     *
+     *   Gate order
+     *   ====== LSTM ======
+     *   Caffe - IFOC, ONNX   - IOFC
+     *   IE    - FICO, mkldnn - IFCO
+     *
+     *   ====== GRU ======
+     *   IE - URO, mkldnn - URO
+     */
+    const int gate_map_lstm[] = {1, 0, 2, 3};  // FICO -> IFCO
+    const int gate_map_gru[]  = {0, 1, 2, 3};
+    const int gate_map_rnn[]  = {0};
+    const int *gate_map;
+    const int gate_map_lstm_size = sizeof(gate_map_lstm) / sizeof(int);
+    const int gate_map_gru_size = sizeof(gate_map_gru) / sizeof(int);
+    const int gate_map_rnn_size = sizeof(gate_map_rnn) / sizeof(int);
+    if (cell_type == mkldnn::algorithm::vanilla_lstm) {
+        gate_map = gate_map_lstm;
+        if (G > gate_map_lstm_size) {
+            IE_THROW() << "G isn't equal to the size of gate_map";
+        }
+    } else if (cell_type == mkldnn::algorithm::vanilla_gru) {
+        gate_map = gate_map_gru;
+        if (G > gate_map_gru_size) {
+            IE_THROW() << "G isn't equal to the size of gate_map";
+        }
+    } else if (cell_type == mkldnn::algorithm::lbr_gru) {
+        gate_map = gate_map_gru;
+        if (G > gate_map_gru_size) {
+            IE_THROW() << "G isn't equal to the size of gate_map";
+        }
+    } else if (cell_type == mkldnn::algorithm::vanilla_rnn) {
+        gate_map = gate_map_rnn;
+        if (G > gate_map_rnn_size) {
+            IE_THROW() << "G isn't equal to the size of gate_map";
+        }
+    } else {
+        gate_map = gate_map_gru;
+        if (G > gate_map_gru_size) {
+            IE_THROW() << "G isn't equal to the size of gate_map";
+        }
+    }
+
+    if (runtimePrecision == Precision::BF16)
+        fillWeights<bfloat16_t>(gate_map, wIdx, rIdx);
+    else if (runtimePrecision == Precision::FP32)
+        fillWeights<float>(gate_map, wIdx, rIdx);
+    else // TODO FP16 and INT8 support
+        IE_THROW() << "Unsupported data type";
+
+    if (runtimePrecision == Precision::BF16 || runtimePrecision == Precision::FP32)
+        fillBiases<Precision::FP32>(gate_map);
 }
 
 void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
@@ -425,7 +641,7 @@ void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
             descs.push_back(desc);
         } break;
         default:
-            THROW_ERROR << "Unknown cell type";
+            IE_THROW() << "Unknown cell type";
     }
 
     // Fill supported config
@@ -450,170 +666,14 @@ void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
     supportedPrimitiveDescriptors.emplace_back(config, ref_any);
 }
 
-bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) {
-    if (!weightsByLayerPrec.count(layerPrec))
-        THROW_ERROR << "Unsupported layer precision " << layerPrec;
-    return weightsPrec == weightsByLayerPrec.at(layerPrec);
-}
-
-void MKLDNNRNN::verifyWeights() {
-    auto layer = getCnnLayer();
-    auto weightsIt = layer->blobs.find("weights");
-
-    if (weightsIt == layer->blobs.end())
-        THROW_ERROR << "Missed weights blob.";
-
-    const auto& weightsPrec = weightsIt->second->getTensorDesc().getPrecision();
-
-    if (!verifyWeightsPrecision(runtimePrecision, weightsPrec)) {
-        THROW_ERROR << "Weights precision " << weightsPrec <<
-            " does not match runtime precision" << runtimePrecision;
-    }
-}
-
-void MKLDNNRNN::verifyBiases() {
-    auto layer = getCnnLayer();
-    if (layer->blobs.find("biases") != layer->blobs.end()
-            && layer->blobs["biases"]->getTensorDesc().getPrecision() != Precision::FP32)
-        THROW_ERROR << "Invalid biases precision: " << layer->blobs["biases"]->getTensorDesc().getPrecision();
-}
-
 void MKLDNNRNN::createPrimitive() {
-    if (prim) return;
-
-    verifyWeights();
-    verifyBiases();
-
-    /*
-     *   Gate order
-     *   ====== LSTM ======
-     *   Caffe - IFOC, ONNX   - IOFC
-     *   IE    - FICO, mkldnn - IFCO
-     *
-     *   ====== GRU ======
-     *   IE - URO, mkldnn - URO
-     */
-    const int gate_map_lstm[] = {1, 0, 2, 3};  // FICO -> IFCO
-    const int gate_map_gru[]  = {0, 1, 2, 3};
-    const int gate_map_rnn[]  = {0};
-    const int *gate_map;
-    const int gate_map_lstm_size = sizeof(gate_map_lstm) / sizeof(int);
-    const int gate_map_gru_size = sizeof(gate_map_gru) / sizeof(int);
-    const int gate_map_rnn_size = sizeof(gate_map_rnn) / sizeof(int);
-    if (cell_type == algorithm::vanilla_lstm) {
-        gate_map = gate_map_lstm;
-        if (G > gate_map_lstm_size) {
-            THROW_ERROR << "G isn't equal to the size of gate_map";
-        }
-    } else if (cell_type == algorithm::vanilla_gru) {
-        gate_map = gate_map_gru;
-        if (G > gate_map_gru_size) {
-            THROW_ERROR << "G isn't equal to the size of gate_map";
-        }
-    } else if (cell_type == algorithm::lbr_gru) {
-        gate_map = gate_map_gru;
-        if (G > gate_map_gru_size) {
-            THROW_ERROR << "G isn't equal to the size of gate_map";
-        }
-    } else if (cell_type == algorithm::vanilla_rnn) {
-        gate_map = gate_map_rnn;
-        if (G > gate_map_rnn_size) {
-            THROW_ERROR << "G isn't equal to the size of gate_map";
-        }
-    } else {
-        gate_map = gate_map_gru;
-        if (G > gate_map_gru_size) {
-            THROW_ERROR << "G isn't equal to the size of gate_map";
-        }
-    }
-
-    if (runtimePrecision == Precision::BF16)
-        fillWeights<bfloat16_t>(gate_map);
-    else if (runtimePrecision == Precision::FP32)
-        fillWeights<float>(gate_map);
-    else // TODO FP16 and INT8 support
-        THROW_ERROR << "Unsupported data type";
-
-    if (runtimePrecision == Precision::BF16 ||
-        runtimePrecision == Precision::FP32)
-        fillBiases<float>(gate_map);
-
     auto pd = descs[0].createPrimitiveDescriptorIterator(getEngine());
     prim.reset(new mkldnn::primitive(pd));
 }
 
-/*
- * IE format:
- *   B - [gates, out_state_size]
- *
- * MKLDNN format:
- *   B - [gates, out_state_size]
- *
- */
-template <typename Prec>
-void MKLDNNRNN::fillBiases(const int *gate_map) {
-    if (!w_bias_d)
-        return;
-
-    auto w_bias_mem = std::make_shared<MKLDNNMemory>(getEngine());
-    w_bias_mem->Create(w_bias_d);
-    internalBlobMemory.push_back(w_bias_mem);
-
-    auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as<const Prec*>();
-    auto b_ptr = static_cast<Prec*>(w_bias_mem->GetData());
-    for (int g = 0; g < Gb; g++) {
-        Prec *l_b_ptr = b_ptr + gate_map[g]*SC;
-        const Prec *l_ie_b_ptr = ie_b_ptr + g * SC;
-        cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(Prec));
-    }
-}
-
-/*
- * IE format:
- *   W - [gates, out_state_size, in_data_size + in_state_size]
- *
- * MKLDNN format:
- *   W - [1, 1, in_date_size,  gates, out_state_size]
- *   R - [1, 1, in_state_size, gates, out_state_size]
- *
- */
-template <typename Prec>
-void MKLDNNRNN::fillWeights(const int *gate_map) {
-    // create weight blobs (data and state part)
-    auto w_data_mem = std::make_shared<MKLDNNMemory>(getEngine());
-    w_data_mem->Create(w_data_d);
-    internalBlobMemory.push_back(w_data_mem);
-    auto w_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
-    w_state_mem->Create(w_state_d);
-    internalBlobMemory.push_back(w_state_mem);
-
-    auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as<const Prec*>();
-    auto w_ptr = static_cast<Prec*>(w_data_mem->GetData());
-    auto r_ptr = static_cast<Prec*>(w_state_mem->GetData());
-    const int step = SC * G;
-
-    for (int g = 0; g < G; g++) {
-        for (int out_i = 0; out_i < SC; out_i++) {
-            Prec *l_w_ptr = w_ptr + gate_map[g]*SC + out_i;
-            Prec *l_r_ptr = r_ptr + gate_map[g]*SC+ out_i;
-            for (int in_i = 0; in_i < DC; in_i++) {
-                *l_w_ptr = *ie_w_ptr;
-                ie_w_ptr++;
-                l_w_ptr += step;
-            }
-
-            for (int in_i = 0; in_i < SC; in_i++) {
-                *l_r_ptr = *ie_w_ptr;
-                ie_w_ptr++;
-                l_r_ptr += step;
-            }
-        }
-    }
-}
-
 void MKLDNNRNN::execute(mkldnn::stream strm) {
     if (!prim)
-        THROW_ERROR << "No initialized primitive to execute";
+        IE_THROW() << "No initialized primitive to execute";
 
     const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr();
     const auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
index 2cf51f09913..2551dd86976 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <mkldnn_node.h>
 #include <string>
 #include <memory>
@@ -14,9 +13,10 @@ namespace MKLDNNPlugin {
 
 class MKLDNNRNN : public MKLDNNNode {
 public:
-    MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNRNN(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNRNN() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void createPrimitive() override;
     bool created() const override;
@@ -26,19 +26,20 @@ public:
     void execute(mkldnn::stream strm) override;
 
 private:
+    void initCell(const std::shared_ptr<ngraph::Node>& op);
+    void initSeq(const std::shared_ptr<ngraph::Node>& op);
     void fillCellDesc();
     void fillSeqDesc();
     bool verifyWeightsPrecision(const InferenceEngine::Precision& layerPrec,
                                 const InferenceEngine::Precision& weightsPrec);
-    void verifyWeights();
-    void verifyBiases();
-    void convertWeightsBlobToBF16();
 
     template <typename Prec>
-    void fillWeights(const int* gate_map);
-    template <typename Prec>
+    void fillWeights(const int* gate_map, const size_t wIdx, const size_t rIdx);
+    template <InferenceEngine::Precision::ePrecision Prec>
     void fillBiases(const int* gate_map);
 
+    void copyWeightsData();
+
 private:
     InferenceEngine::Precision runtimePrecision;
     /** Specify mode Cell or Seq. true - Cell, false - Seq */
@@ -80,10 +81,14 @@ private:
     MKLDNNMemoryDesc w_state_d;
     MKLDNNMemoryDesc w_bias_d;
 
-    // List of in/out reorders if required
-    std::vector<mkldnn::reorder> exec_before;
-    std::vector<mkldnn::reorder> exec_after;
+    std::vector<size_t > in_data_dims;
+    std::vector<size_t > out_data_dims;
+
+    size_t wIdx = 0;
+    size_t rIdx = 0;
+    size_t bIdx = 0;
 
     static const std::map<InferenceEngine::Precision, InferenceEngine::Precision> weightsByLayerPrec;
-}; // class MKLDNNRNN
+};
+
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp
index 9ad06a5427a..1aa7752f456 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp
@@ -3,7 +3,6 @@
 //
 
 #include "mkldnn_roi_align_node.h"
-#include <legacy/ie_layers.h>
 #include <mkldnn.hpp>
 #include <string>
 #include <vector>
@@ -14,6 +13,7 @@
 #include <cpu/x64/cpu_isa_traits.hpp>
 #include "ie_parallel.hpp"
 #include <mkldnn_selective_build.h>
+#include <ngraph/opsets/opset3.hpp>
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -21,20 +21,53 @@ using namespace mkldnn;
 using namespace mkldnn::impl::cpu;
 using namespace mkldnn::impl::cpu::x64;
 
-MKLDNNROIAlignNode::MKLDNNROIAlignNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
-                                       MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+using ngPoolingMode = ngraph::op::v3::ROIAlign::PoolingMode;
+
+bool MKLDNNROIAlignNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto roiAlign = std::dynamic_pointer_cast<const ngraph::opset3::ROIAlign>(op);
+        if (!roiAlign) {
+            errorMessage = "Only opset3 ROIAlign operation is supported";
+            return false;
+        }
+
+        const ngPoolingMode mode = roiAlign->get_mode();
+        if (mode != ngPoolingMode::AVG && mode != ngPoolingMode::MAX) {
+            errorMessage = "Doesn't support mode: " + ngraph::as_string(mode);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNROIAlignNode::MKLDNNROIAlignNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+                                       MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "ROIPooling layer with name '" + getName() + "' ";
+
+        const auto roiAlign = std::dynamic_pointer_cast<const ngraph::opset3::ROIAlign>(op);
+        pooledH = roiAlign->get_pooled_h();
+        pooledW = roiAlign->get_pooled_w();
+        spatialScale = roiAlign->get_spatial_scale();
+        samplingRatio = roiAlign->get_sampling_ratio();
+        const ngPoolingMode m = roiAlign->get_mode();
+        if (m == ngPoolingMode::MAX) {
+            algorithm = Algorithm::ROIAlignMax;
+        } else if (m == ngPoolingMode::AVG) {
+            algorithm = Algorithm::ROIAlignAvg;
+        }
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNROIAlignNode::getSupportedDescriptors() {
     if (!descs.empty())
         return;
 
-    class CNNLayer *genericLayer = getCnnLayer().get();
-    if (genericLayer == nullptr)
-        IE_THROW() << "Cannot convert ROIPooling layer.";
-
-    std::string errorPrefix = "ROIPooling layer with name '" + getName() + "' ";
-
     if (getParentEdges().size() != 3)
         IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size();
     if (getChildEdges().empty())
@@ -66,27 +99,14 @@ void MKLDNNROIAlignNode::getSupportedDescriptors() {
                            << getParentEdgeAt(1)->getDims()[0] << ") and indexes ("
                            << getParentEdgeAt(2)->getDims()[0] << ")";
     }
-
-    pooledH = genericLayer->GetParamAsInt("pooled_h");
-    pooledW = genericLayer->GetParamAsInt("pooled_w");
-    spatialScale = genericLayer->GetParamAsFloat("spatial_scale");
-    samplingRatio = genericLayer->GetParamAsInt("sampling_ratio");
-    std::string m = genericLayer->GetParamAsString("mode");
-    if (m == "max") {
-        opType = ROIAlignOpType::Max;
-    } else if (m == "avg") {
-        opType = ROIAlignOpType::Avg;
-    } else {
-        IE_THROW() << errorPrefix << "doesn't support roi pooling method: " << m;
-    }
 }
 
 void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    Precision inputPrec0 = getCnnLayer()->insData[0].lock()->getPrecision();
-    Precision outputPrec = getCnnLayer()->outData[0]->getPrecision();
+    Precision inputPrec0 = getOriginalInputPrecisionAtPort(0);
+    Precision outputPrec = getOriginalOutputPrecisionAtPort(0);
 
     if (!mayiuse(avx512_core)) {
         if (outputPrec == Precision::BF16 || inputPrec0 == Precision::BF16)
@@ -291,8 +311,8 @@ void MKLDNNROIAlignNode::executeSpecified() {
                                     pointVector[sampleIndex + 3].second * wInputStride + blockResidual_;
                 float part4 = srcData[part4Index];
 
-                switch (opType) {
-                    case ROIAlignOpType::Max:
+                switch (getAlgorithm()) {
+                    case Algorithm::ROIAlignMax:
                     {
                         float sampleValue = std::max(
                                 {weightVector[sampleIndex] * part1,
@@ -302,7 +322,7 @@ void MKLDNNROIAlignNode::executeSpecified() {
                         pooledValue = sampleValue > pooledValue ? sampleValue : pooledValue;
                         break;
                     }
-                    case ROIAlignOpType::Avg:
+                    case Algorithm::ROIAlignAvg:
                     default:
                     {
                         float sampleValue =
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h
index 5e1901644bc..24831cce5b4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h
@@ -13,14 +13,9 @@
 
 namespace MKLDNNPlugin {
 
-enum ROIAlignOpType {
-    Max,
-    Avg
-};
-
 class MKLDNNROIAlignNode : public MKLDNNNode {
 public:
-    MKLDNNROIAlignNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNROIAlignNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNROIAlignNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -29,16 +24,19 @@ public:
     void execute(mkldnn::stream strm) override;
     bool created() const override;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     int pooledH = 7;
     int pooledW = 7;
     int samplingRatio = 2;
     float spatialScale = 1.0f;
-    ROIAlignOpType opType = Max;
     template <typename inputType, typename outputType>
     void executeSpecified();
     template<typename T>
     struct ROIAlignExecute;
-};
-}  // namespace MKLDNNPlugin
 
+    std::string errorPrefix;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
index c0033280eeb..169586819b7 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
@@ -4,7 +4,6 @@
 
 #include "mkldnn_roi_pooling_node.h"
 
-#include <legacy/ie_layers.h>
 #include <mkldnn.hpp>
 #include <string>
 #include <vector>
@@ -12,6 +11,7 @@
 #include <mkldnn_extension_utils.h>
 #include <cpu/x64/jit_generator.hpp>
 #include "ie_parallel.hpp"
+#include <ngraph/opsets/opset2.hpp>
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -46,7 +46,7 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi
         mov(reg_bin_area, ptr[this->param1 + GET_OFF(bin_area)]);
         mov(reg_c_blocks, ptr[this->param1 + GET_OFF(c_blocks)]);
 
-        if (jpp_.alg == ROIPoolingOpType::Max) {
+        if (jpp_.alg == Algorithm::ROIPoolingMax) {
             mov(reg_kh, ptr[this->param1 + GET_OFF(kh)]);
             mov(reg_kw, ptr[this->param1 + GET_OFF(kw)]);
         } else {
@@ -220,7 +220,7 @@ private:
         cmp(reg_bin_area, 0);
         je(empty_roi_label, T_NEAR);
 
-        if (jpp_.alg == ROIPoolingOpType::Max)
+        if (jpp_.alg == Algorithm::ROIPoolingMax)
             roi_pool_max(c_blocks);
         else
             roi_pool_bilinear(c_blocks);
@@ -229,7 +229,7 @@ private:
             add(reg_input, 4 * sizeof(float));
             add(reg_output, 4 * sizeof(float));
 
-            if (jpp_.alg == ROIPoolingOpType::Max)
+            if (jpp_.alg == Algorithm::ROIPoolingMax)
                 roi_pool_max(c_blocks);
             else
                 roi_pool_bilinear(c_blocks);
@@ -247,20 +247,49 @@ private:
     }
 };
 
-MKLDNNROIPoolingNode::MKLDNNROIPoolingNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
-        MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNROIPoolingNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto roiPooling = std::dynamic_pointer_cast<const ngraph::opset2::ROIPooling>(op);
+        if (!roiPooling) {
+            errorMessage = "Only opset2 ROIPooling operation is supported";
+            return false;
+        }
+        const std::string mode = roiPooling->get_method();
+        if (mode != "max" && mode != "bilinear") {
+            errorMessage = "Doesn't support method: " + mode;
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNROIPoolingNode::MKLDNNROIPoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        std::string errorPrefix = "ROIPooling layer with name '" + getName() + "' ";
+
+        const auto roiPooling = std::dynamic_pointer_cast<const ngraph::opset2::ROIPooling>(op);
+        pooled_h = roiPooling->get_output_size()[0];
+        pooled_w = roiPooling->get_output_size()[1];
+        spatial_scale = roiPooling->get_spatial_scale();
+        std::string m = roiPooling->get_method();
+        if (m == "max") {
+            algorithm = Algorithm::ROIPoolingMax;
+        } else if (m == "bilinear") {
+            algorithm = Algorithm::ROIPoolingBilinear;
+        }
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNROIPoolingNode::getSupportedDescriptors() {
     if (!descs.empty())
         return;
 
-    GenericLayer* genericLayer = getCnnLayer().get();
-    if (genericLayer == nullptr)
-        IE_THROW() << "Cannot convert ROIPooling layer.";
-
-    std::string errorPrefix = "ROIPooling layer with name '" + getName() + "' ";
-
     if (getParentEdges().size() != 2)
         IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size();
     if (getChildEdges().empty())
@@ -282,18 +311,6 @@ void MKLDNNROIPoolingNode::getSupportedDescriptors() {
         IE_THROW() << errorPrefix << "has invalid shape on 1st input: ["
                                           << getParentEdgeAt(1)->getDims()[0] << "," << getParentEdgeAt(1)->getDims()[1] << "]";
     }
-
-    pooled_h = genericLayer->GetParamAsInt("pooled_h");
-    pooled_w = genericLayer->GetParamAsInt("pooled_w");
-    spatial_scale = genericLayer->GetParamAsFloat("spatial_scale");
-    std::string m = genericLayer->GetParamAsString("method", "max");
-    if (m == "max") {
-        opType = ROIPoolingOpType::Max;
-    } else if (m == "bilinear") {
-        opType = ROIPoolingOpType::Bilinear;
-    } else {
-        IE_THROW() << errorPrefix << "doesn't support roi pooling method: " << m;
-    }
 }
 
 void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
@@ -358,7 +375,7 @@ void MKLDNNROIPoolingNode::createPrimitive() {
 
     jpp.nb_c_blocking = mayiuse(cpu::x64::avx512_common) ? 15 : 7;
 
-    jpp.alg = opType;
+    jpp.alg = getAlgorithm();
 
     if (mayiuse(cpu::x64::avx512_common)) {
         roi_pooling_kernel.reset(new jit_uni_roi_pooling_kernel_f32<cpu::x64::avx512_common>(jpp));
@@ -430,7 +447,7 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) {
 
             int roi_batch_ind = static_cast<int>(src_roi_ptr[0]);
 
-            if (jpp.alg == ROIPoolingOpType::Max) {
+            if (jpp.alg == Algorithm::ROIPoolingMax) {
                 int roi_start_w = static_cast<int>(round(src_roi_ptr[1] * jpp.spatial_scale));
                 int roi_start_h = static_cast<int>(round(src_roi_ptr[2] * jpp.spatial_scale));
                 int roi_end_w = static_cast<int>(round(src_roi_ptr[3] * jpp.spatial_scale));
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h
index f3b19aa2328..c9e2be59924 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h
@@ -12,11 +12,6 @@
 
 namespace MKLDNNPlugin {
 
-enum ROIPoolingOpType {
-    Max,
-    Bilinear
-};
-
 struct jit_roi_pooling_params {
     int mb, c;
     int ih, iw, oh, ow;
@@ -27,7 +22,7 @@ struct jit_roi_pooling_params {
     int pooled_h;
     int pooled_w;
 
-    ROIPoolingOpType alg;
+    Algorithm alg;
 };
 
 struct jit_roi_pooling_call_args {
@@ -65,7 +60,7 @@ struct jit_uni_roi_pooling_kernel {
 
 class MKLDNNROIPoolingNode : public MKLDNNNode {
 public:
-    MKLDNNROIPoolingNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNROIPoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNROIPoolingNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -74,16 +69,18 @@ public:
     void execute(mkldnn::stream strm) override;
     bool created() const override;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     int pooled_h = 0;
     int pooled_w = 0;
     float spatial_scale = 0;
-    ROIPoolingOpType opType = Max;
 
     jit_roi_pooling_params jpp = {};
 
     std::shared_ptr<jit_uni_roi_pooling_kernel> roi_pooling_kernel = nullptr;
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
-
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
index aa1d6623463..136ccba9c64 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
@@ -13,85 +13,82 @@
 #include "mkldnn/ie_mkldnn.h"
 #include "utils/general_utils.h"
 #include "common/cpu_memcpy.h"
+#include <ngraph/opsets/opset7.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNRollNode::MKLDNNRollNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-                MKLDNNNode(layer, eng, cache) {
-    layerErrorPrefix = "Roll layer with name '" + layer->name + "'";
-    if (layer->insData.size() != numberOfInputs) {
-        IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!";
+bool MKLDNNRollNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto interp = std::dynamic_pointer_cast<const ngraph::opset7::Roll>(op);
+        if (!interp) {
+            errorMessage = "Only opset7 Roll operation is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
     }
+    return true;
+}
 
-    /* Data */
-    auto data = layer->insData[DATA_INDEX].lock();
-    if (data == nullptr) {
-        IE_THROW() << layerErrorPrefix << " has nullable data";
-    }
+MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+                MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        layerErrorPrefix = "Roll layer with name '" + getName() + "'";
+        if (getOriginalInputsNumber() != numberOfInputs) {
+            IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!";
+        }
 
-    const auto &dataTensor = data->getTensorDesc();
-    shape = dataTensor.getDims();
-    const auto &dataPrecision = dataTensor.getPrecision();
+        shape = inDims[DATA_INDEX].ToSizeVector();
+        const auto &dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX);
 
-    if (std::find(supportedPrecisionSizes.begin(), supportedPrecisionSizes.end(), dataPrecision.size()) == supportedPrecisionSizes.end())
-        IE_THROW() << layerErrorPrefix << "has unsupported precision: " << dataPrecision.name();
+        if (std::find(supportedPrecisionSizes.begin(), supportedPrecisionSizes.end(), dataPrecision.size()) == supportedPrecisionSizes.end())
+            IE_THROW() << layerErrorPrefix << "has unsupported precision: " << dataPrecision.name();
 
-    if (shape.size() < 1) {
-        IE_THROW() << layerErrorPrefix << " doesn't support 'data' input tensor with rank: " << shape.size();
-    }
-    numOfDims = shape.size();
+        if (shape.size() < 1) {
+            IE_THROW() << layerErrorPrefix << " doesn't support 'data' input tensor with rank: " << shape.size();
+        }
+        numOfDims = shape.size();
 
-    if (shape != layer->outData[0]->getTensorDesc().getDims()) {
-        IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions";
-    }
+        if (shape != outDims[0].ToSizeVector()) {
+            IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions";
+        }
 
-    /* Axes */
-    auto axesData = layer->insData[AXES_INDEX].lock();
-    if (axesData == nullptr) {
-        IE_THROW() << layerErrorPrefix << " has nullable 'axes' data";
-    }
-    const auto& axesTensor = axesData->getTensorDesc();
-    const auto& axesTensorPrec = axesData->getTensorDesc().getPrecision();
-    if (axesTensorPrec != Precision::I32 && axesTensorPrec != Precision::I64) {
-        IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name();
-    }
+        /* Axes */
+        const auto& axesTensorPrec = getOriginalInputPrecisionAtPort(AXES_INDEX);
+        if (axesTensorPrec != Precision::I32 && axesTensorPrec != Precision::I64) {
+            IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name();
+        }
 
-    const auto axesTensorRank = axesTensor.getDims().size();
-    if (axesTensorRank > 1) {
-        IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank;
-    }
+        const auto axesTensorRank = inDims[AXES_INDEX].ndims();
+        if (axesTensorRank > 1) {
+            IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank;
+        }
 
-    /* Shift */
-    auto shiftData = layer->insData[SHIFT_INDEX].lock();
-    if (shiftData == nullptr) {
-        IE_THROW() << layerErrorPrefix << " has nullable 'shift' data";
-    }
-    const auto& shiftTensor = shiftData->getTensorDesc();
-    const auto& shiftTensorPrec = shiftData->getTensorDesc().getPrecision();
-    if (shiftTensorPrec != Precision::I32 && shiftTensorPrec != Precision::I64) {
-        IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name();
-    }
+        /* Shift */
+        const auto& shiftTensorPrec = getOriginalInputPrecisionAtPort(SHIFT_INDEX);
+        if (shiftTensorPrec != Precision::I32 && shiftTensorPrec != Precision::I64) {
+            IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name();
+        }
 
-    const auto shiftTensorRank = shiftTensor.getDims().size();
-    if (shiftTensorRank > 1) {
-        IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank;
+        const auto shiftTensorRank = inDims[SHIFT_INDEX].ndims();
+        if (shiftTensorRank > 1) {
+            IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank;
+        }
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
     }
 }
+
 void MKLDNNRollNode::getSupportedDescriptors() {}
 
 void MKLDNNRollNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto inputData = getCnnLayer()->insData[0].lock();
-
-    if (inputData == nullptr) {
-        IE_THROW() << layerErrorPrefix << " has nullable 'data'";
-    }
-
-    InferenceEngine::Precision precision = inputData->getPrecision();
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
 
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h
index 019d65f6332..102b55c5e63 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h
@@ -12,7 +12,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNRollNode : public MKLDNNNode {
 public:
-    MKLDNNRollNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNRollNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNRollNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -21,6 +21,8 @@ public:
     void execute(mkldnn::stream strm) override;
     bool created() const override;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     size_t calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize);
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
index 6c20d582ffa..5b9692fc562 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
@@ -3,41 +3,59 @@
 //
 
 #include "mkldnn_scatter_update_node.h"
-#include <legacy/ie_layers.h>
 #include <mkldnn.hpp>
 #include <string>
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
-#include <legacy/ie_layers_internal.hpp>
 #include "ie_parallel.hpp"
 #include <algorithm>
 #include "common/cpu_memcpy.h"
 
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/opsets/opset4.hpp>
+
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNScatterUpdateNode::MKLDNNScatterUpdateNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache), dataSize(0lu), indicesSize(0lu), axisSize(0lu),
-        dataPrec(Precision::UNSPECIFIED), indicesPrec(Precision::UNSPECIFIED), axisPrec(Precision::UNSPECIFIED) {}
+bool MKLDNNScatterUpdateNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto scatterElemUpd = std::dynamic_pointer_cast<const ngraph::opset3::ScatterElementsUpdate>(op);
+        const auto scatterUpd = std::dynamic_pointer_cast<const ngraph::opset3::ScatterUpdate>(op);
+        const auto scatterNdUpd = std::dynamic_pointer_cast<const ngraph::opset4::ScatterNDUpdate>(op);
+        if (scatterElemUpd == nullptr && scatterUpd == nullptr && scatterNdUpd == nullptr) {
+            const std::string opType = op->get_type_name();
+            errorMessage = "Only opset" + opType == "ScatterNDUpdate" ? "4 " : "3 " + opType + " operation is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNScatterUpdateNode::MKLDNNScatterUpdateNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache), dataSize(0lu), indicesSize(0lu), axisSize(0lu), dataPrec(Precision::UNSPECIFIED), indicesPrec(Precision::UNSPECIFIED),
+          axisPrec(Precision::UNSPECIFIED) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = std::string(op->get_type_name()) + " node with name '" + getName() + "'";
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNScatterUpdateNode::getSupportedDescriptors() {
-    if (!descs.empty())
-        return;
-
     if ((getParentEdges().size() != 3) && (getParentEdges().size() != 4))
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' has incorrect number of input edges";
+        IE_THROW() << errorPrefix << " has incorrect number of input edges";
     if (getChildEdges().empty())
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' has incorrect number of output edges";
+        IE_THROW() << errorPrefix << " has incorrect number of output edges";
 
     if (getParentEdgeAt(DATA_ID)->getDims().ndims() < 1 ||
         getParentEdgeAt(INDICES_ID)->getDims().ndims() < 1 ||
         getParentEdgeAt(UPDATE_ID)->getDims().ndims() < 1) {
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' do not support scalar input";
+        IE_THROW() << errorPrefix << " do not support scalar input";
     }
 
     Type scatterUpdateType = getType();
@@ -51,8 +69,7 @@ void MKLDNNScatterUpdateNode::getSupportedDescriptors() {
         scatterUpdateMode = ScatterUpdateMode::ScatterNDUpdate;
         axisRelaxed = false;
     } else {
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' is not supported";
+        IE_THROW() << errorPrefix << " is not supported";
     }
 }
 
@@ -72,14 +89,12 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
 
     // common check
     if (srcRank != dstRank) {
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' should have same rank for input and outpt tensor";
+        IE_THROW() << errorPrefix << " should have same rank for input and output tensor";
     } else {
         for (size_t r = 0; r < srcRank; r++) {
             if (srcDataDim[r] != dstDataDim[r]) {
-                IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-                << "' should have same shape for input and outpt tensor." << " The input shape is "
-                << srcDataDim[r] << ", while output shape is " << dstDataDim[r] << "for" << r << "th dimension";
+                IE_THROW() << errorPrefix << " should have same shape for input and output tensor. The input shape is "
+                                   << srcDataDim[r] << ", while output shape is " << dstDataDim[r] << " for " << r << "th dimension";
             }
         }
     }
@@ -87,16 +102,15 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
     switch (scatterUpdateMode) {
         case ScatterUpdateMode::ScatterUpdate: {
             if (updateRank != (srcRank + indicesRank - 1)) {
-                IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-                << "' do not have matched tensor rank relationship for input, indices and update";
+                IE_THROW() << errorPrefix << " do not have matched tensor rank relationship for input, indices and update";
             }
             break;
         }
         case ScatterUpdateMode::ScatterNDUpdate: {
             size_t k = indicesDim[indicesRank - 1];
             if (k > srcRank) {
-                IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-                << "' do not have an correct indices' last dimension value, which should be smaller than or equal to input tensor rank";
+                IE_THROW() << errorPrefix << "' do not have an correct indices' last dimension value, "
+                                   << "which should be smaller than or equal to input tensor rank";
             }
 
             SizeVector expectUpdateShape = {};
@@ -108,37 +122,32 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
                 expectUpdateShape.push_back(srcDataDim[rd]);
             }
             if (expectUpdateShape.size() != updateRank) {
-                IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-                << "' do not have matched tensor rank relationship for input, indices and update";
+                IE_THROW() << errorPrefix << " do not have matched tensor rank relationship for input, indices and update";
             }
             for (size_t ru = 0; ru < updateRank; ru++) {
                 if (updateDim[ru] != expectUpdateShape[ru]) {
-                    IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-                    << "' do not have matched tensor shape relationship for input, indices and update";
+                    IE_THROW() << errorPrefix << " do not have matched tensor shape relationship for input, indices and update";
                 }
             }
             break;
         }
         case ScatterUpdateMode::ScatterElementsUpdate: {
             if (srcRank != indicesRank || srcRank != updateRank) {
-                IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-                << "' do not have the same tensor rank for input, indices and update";
+                IE_THROW() << errorPrefix << " do not have the same tensor rank for input, indices and update";
             }
             for (size_t ri = 0; ri < indicesRank; ri++) {
                 if (indicesDim[ri] != updateDim[ri]) {
-                    IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-                    << "' do not have the same tensor shape for indices and update";
+                    IE_THROW() << errorPrefix << " do not have the same tensor shape for indices and update";
                 }
             }
             break;
         }
         default: {
-            IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-            << "' is not supported";
+            IE_THROW() << errorPrefix << " is not supported";
         }
     }
 
-    indicesPrec = getCnnLayer()->insData[INDICES_ID].lock()->getPrecision();
+    indicesPrec = getOriginalInputPrecisionAtPort(INDICES_ID);
     auto indicesType = MKLDNNExtensionUtils::IEPrecisionToDataType(indicesPrec);
     indicesSize = MKLDNNExtensionUtils::sizeOfDataType(indicesType);
     if (indicesSize >= 8) {
@@ -151,7 +160,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
     indicesType = MKLDNNExtensionUtils::IEPrecisionToDataType(indicesPrec);
 
     if (axisRelaxed) {
-        axisPrec = getCnnLayer()->insData[AXIS_ID].lock()->getPrecision();
+        axisPrec = getOriginalInputPrecisionAtPort(AXIS_ID);
         auto axisType = MKLDNNExtensionUtils::IEPrecisionToDataType(axisPrec);
         axisSize = MKLDNNExtensionUtils::sizeOfDataType(axisType);
         if (axisSize >= 8) {
@@ -163,7 +172,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
         }
     }
 
-    dataPrec = getCnnLayer()->insData[DATA_ID].lock()->getPrecision();
+    dataPrec = getOriginalInputPrecisionAtPort(DATA_ID);
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(dataPrec);
     dataSize = MKLDNNExtensionUtils::sizeOfDataType(dataType);
 
@@ -215,20 +224,15 @@ void MKLDNNScatterUpdateNode::createPrimitive() {
     auto &updateMemPtr = getParentEdgeAt(UPDATE_ID)->getMemoryPtr();
 
     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' did not allocate destination memory";
+        IE_THROW() << errorPrefix << " did not allocate destination memory";
     if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' did not allocate input memory";
+        IE_THROW() << errorPrefix << " did not allocate input memory";
     if (!indicesMemPtr || !indicesMemPtr->GetPrimitivePtr())
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' did not allocate indices memory";
+        IE_THROW() << errorPrefix << " did not allocate indices memory";
     if (!updateMemPtr || !updateMemPtr->GetPrimitivePtr())
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' did not allocate update memory";
+        IE_THROW() << errorPrefix << " did not allocate update memory";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << "'" << getType() << "'" << " layer with name '" << getName()
-        << "' did not set preferable primitive descriptor";
+        IE_THROW() << errorPrefix << " did not set preferable primitive descriptor";
 }
 
 int64_t MKLDNNScatterUpdateNode::getIndicesValue(uint8_t *indices, size_t offset) {
@@ -272,7 +276,6 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) {
     SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims();
     size_t srcRank = srcDataDim.size();
     int axis = 0;
-    std::string errorPrefix = std::string("'") + getTypeStr() + "'" + " layer with name '" + getName() + "'";
     if (axisRelaxed) {
         auto &axisMemPtr = getParentEdgeAt(AXIS_ID)->getMemoryPtr();
         uint8_t *axisPtr = reinterpret_cast<uint8_t*>(axisMemPtr->GetData()) +
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h
index 720e3234fad..c3656209b96 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h
@@ -20,7 +20,7 @@ enum class ScatterUpdateMode {
 
 class MKLDNNScatterUpdateNode : public MKLDNNNode {
 public:
-    MKLDNNScatterUpdateNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNScatterUpdateNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNScatterUpdateNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -32,6 +32,8 @@ public:
         return false;
     }
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     void scatterUpdate(uint8_t *indicesPtr, uint8_t *updatePtr, int axis, uint8_t *dstDataPtr);
     void scatterNDUpdate(uint8_t *indicesPtr, uint8_t *updatePtr, uint8_t *dstDataPtr);
@@ -48,6 +50,8 @@ private:
     bool axisRelaxed = false;
     size_t dataSize, indicesSize, axisSize;
     InferenceEngine::Precision dataPrec, indicesPrec, axisPrec;
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp
new file mode 100644
index 00000000000..c67a4394ed8
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp
@@ -0,0 +1,222 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include "ie_parallel.hpp"
+#include "mkldnn_select_node.h"
+#include <nodes/common/tensor_desc_creator.h>
+#include <ngraph/opsets/opset1.hpp>
+#include <utils/general_utils.h>
+#include "common/cpu_memcpy.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNSelectNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto select = std::dynamic_pointer_cast<const ngraph::opset1::Select>(op);
+        if (!select) {
+            errorMessage = "Only opset1 Select operation is supported";
+            return false;
+        }
+        const auto broadcast = select->get_auto_broadcast();
+        if (!MKLDNNPlugin::one_of(broadcast, ngraph::op::AutoBroadcastSpec::NONE, ngraph::op::AutoBroadcastSpec::NUMPY)) {
+            errorMessage = "Does not support broadcast type: " + ngraph::as_string(broadcast.m_type);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNSelectNode::MKLDNNSelectNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = "Select layer with name '" + op->get_friendly_name() + "'";
+    const auto select = std::dynamic_pointer_cast<const ngraph::opset1::Select>(op);
+
+    if (op->get_input_size() != numOfInputs || op->get_output_size() != 1)
+        IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+    const auto broadcast = select->get_auto_broadcast();
+    if (broadcast == ngraph::op::AutoBroadcastSpec::NONE) {
+        broadcastType = SelectBroadcastType::NONE;
+    } else if (broadcast == ngraph::op::AutoBroadcastSpec::NUMPY) {
+        broadcastType = SelectBroadcastType::NUMPY;
+    } else {
+        IE_THROW() << errorPrefix << " has unsupported broadcast type: " + ngraph::as_string(broadcast.m_type);
+    }
+
+    auto conditionShapes = op->get_input_shape(CONDITION);
+    if (ngraph::is_scalar(conditionShapes))
+        conditionShapes = ngraph::Shape{1};
+    auto thenShapes = op->get_input_shape(THEN);
+    if (ngraph::is_scalar(thenShapes))
+        thenShapes = ngraph::Shape{1};
+    auto elseShapes = op->get_input_shape(ELSE);
+    if (ngraph::is_scalar(elseShapes))
+        elseShapes = ngraph::Shape{1};
+    auto outputShapes = op->get_output_shape(0);
+    if (ngraph::is_scalar(outputShapes))
+        outputShapes = ngraph::Shape{1};
+
+    if (broadcastType == SelectBroadcastType::NONE && ((conditionShapes != outputShapes) || (thenShapes != outputShapes) ||
+                                                       (elseShapes != outputShapes)))
+        IE_THROW() << errorPrefix << " and auto_broadcast='none' has input shapes mismatch";
+
+    if (broadcastType == SelectBroadcastType::NUMPY) {
+        if (outputShapes.size() < conditionShapes.size() || outputShapes.size() < thenShapes.size() || outputShapes.size() < elseShapes.size())
+            IE_THROW() << errorPrefix << " and auto_broadcast='numpy' has incompatible input and output shapes";
+
+        for (int condIt = conditionShapes.size() - 1, outIt = outputShapes.size() - 1; condIt >= 0; condIt--, outIt--)
+            if (conditionShapes[condIt] != outputShapes[outIt] && conditionShapes[condIt] != 1)
+                IE_THROW() << errorPrefix << " and auto_broadcast='numpy' has incompatible 'Condition' input and output shapes";
+
+        for (int thenIt = thenShapes.size() - 1, outIt = outputShapes.size() - 1; thenIt >= 0; thenIt--, outIt--)
+            if (thenShapes[thenIt] != outputShapes[outIt] && thenShapes[thenIt] != 1)
+                IE_THROW() << errorPrefix << " and auto_broadcast='numpy' has incompatible 'Then' input and output shapes";
+
+        for (int elseIt = elseShapes.size() - 1, outIt = outputShapes.size() - 1; elseIt >= 0; elseIt--, outIt--)
+            if (elseShapes[elseIt] != outputShapes[outIt] && elseShapes[elseIt] != 1)
+                IE_THROW() << errorPrefix << " and auto_broadcast='numpy' has incompatible 'Else' input and output shapes";
+    }
+
+    resDims.resize(numOfDims, 1);
+    std::copy(std::begin(outputShapes), std::end(outputShapes), std::begin(resDims) + (numOfDims - outputShapes.size()));
+    if (broadcastType == SelectBroadcastType::NUMPY) {
+        calcOutOffset(resOffset, resDims);
+
+        std::vector<size_t> condDims(numOfDims, 1);
+        std::copy(std::begin(conditionShapes), std::end(conditionShapes), std::begin(condDims) + (numOfDims - conditionShapes.size()));
+        calcInOffset(condOffset, condDims, resDims);
+
+        std::vector<size_t> thenDims(numOfDims, 1);
+        std::copy(std::begin(thenShapes), std::end(thenShapes), std::begin(thenDims) + (numOfDims - thenShapes.size()));
+        calcInOffset(thenOffset, thenDims, resDims);
+
+        std::vector<size_t> elseDims(numOfDims, 1);
+        std::copy(std::begin(elseShapes), std::end(elseShapes), std::begin(elseDims) + (numOfDims - elseShapes.size()));
+        calcInOffset(elseOffset, elseDims, resDims);
+    }
+}
+
+void MKLDNNSelectNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    const auto inputThenPrecision = getOriginalInputPrecisionAtPort(THEN);
+    const auto inputElsePrecision = getOriginalInputPrecisionAtPort(ELSE);
+    auto inputPrecision = inputThenPrecision;
+    if (inputThenPrecision == Precision::BF16 || inputElsePrecision == Precision::BF16) {
+        inputPrecision = Precision::BF16;
+    } else if (inputThenPrecision != inputElsePrecision) {
+        IE_THROW() << errorPrefix << " has different precisions on 'Then' and 'Else' inputs ";
+    }
+
+    const auto conditionPrecision = getOriginalInputPrecisionAtPort(CONDITION);
+    if (conditionPrecision != Precision::BOOL && conditionPrecision != Precision::I32  && conditionPrecision != Precision::U8)
+        IE_THROW() << errorPrefix << " has unsupported precision: " << conditionPrecision << " on 'Condition' input";
+
+    const auto inputPrecisionSize = inputPrecision.size();
+    if (inputPrecisionSize != 1 && inputPrecisionSize != 2 && inputPrecisionSize != 4 && inputPrecisionSize != 8)
+        IE_THROW() << errorPrefix << " has unsupported precision: " << inputPrecision << " on 'Then' and 'Else' inputs";
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, conditionPrecision},
+                          {TensorDescCreatorTypes::ncsp, inputPrecision},
+                          {TensorDescCreatorTypes::ncsp, inputPrecision}},
+                         {{TensorDescCreatorTypes::ncsp, inputPrecision}},
+                         impl_desc_type::ref_any);
+}
+
+void MKLDNNSelectNode::calcOutOffset(std::vector<size_t>& offset, const std::vector<size_t>& dims) {
+    offset.resize(numOfDims);
+    int k = 1;
+    for (int i = dims.size() - 1; i >= 0; i--) {
+        offset[i] = k;
+        k *= dims[i];
+    }
+}
+
+void MKLDNNSelectNode::calcInOffset(std::vector<size_t>& offset, const std::vector<size_t>& inDims, const std::vector<size_t>& outDims) {
+    offset.resize(numOfDims);
+    int k = 1;
+    for (int i = inDims.size() - 1; i >= 0; i--) {
+        offset[i] = (inDims[i] == outDims[i]) ? k : 0;
+        k *= inDims[i];
+    }
+}
+
+template <typename COND_T, typename DATA_T>
+void MKLDNNSelectNode::execute_impl() {
+    const auto *conditionData = reinterpret_cast<const COND_T *>(getParentEdgeAt(CONDITION)->getMemoryPtr()->GetPtr());
+    const auto *thenData = reinterpret_cast<const DATA_T *>(getParentEdgeAt(THEN)->getMemoryPtr()->GetPtr());
+    const auto *elseData = reinterpret_cast<const DATA_T *>(getParentEdgeAt(ELSE)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<DATA_T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    if (broadcastType == SelectBroadcastType::NONE) {
+        size_t dstDataSize = std::accumulate(begin(resDims), end(resDims), 1, std::multiplies<size_t>());
+        parallel_for(dstDataSize, [&](size_t i) {
+            dstData[i] = conditionData[i] ? thenData[i] : elseData[i];
+        });
+    } else {
+        parallel_for4d(resDims[N], resDims[C], resDims[D], resDims[H], [&](int b, int c, int d, int h) {
+            for (int w = 0; w < resDims[W]; w++) {
+                size_t indexOut = b * resOffset[N] + c * resOffset[C] + d * resOffset[D] + h * resOffset[H] + w * resOffset[W];
+                size_t indexCond = b * condOffset[N] + c * condOffset[C] + d * condOffset[D] + h * condOffset[H] + w * condOffset[W];
+                size_t indexThen = b * thenOffset[N] + c * thenOffset[C] + d * thenOffset[D] + h * thenOffset[H] + w * thenOffset[W];
+                size_t indexElse = b * elseOffset[N] + c * elseOffset[C] + d * elseOffset[D] + h * elseOffset[H] + w * elseOffset[W];
+                dstData[indexOut] = conditionData[indexCond] ? thenData[indexThen] : elseData[indexElse];
+            }
+        });
+    }
+}
+
+void MKLDNNSelectNode::execute(mkldnn::stream strm) {
+    const size_t condPrecSize = getParentEdgeAt(CONDITION)->getDesc().getPrecision().size();
+    const size_t inputsPrecSize = getParentEdgeAt(THEN)->getDesc().getPrecision().size();
+
+    switch (condPrecSize) {
+        case 1: {
+            switch (inputsPrecSize) {
+                case 1: { execute_impl<uint8_t, uint8_t>(); break; }
+                case 2: { execute_impl<uint8_t, uint16_t>(); break; }
+                case 4: { execute_impl<uint8_t, uint32_t>(); break; }
+                case 8: { execute_impl<uint8_t, uint64_t>(); break; }
+                default:
+                    IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: "
+                                   + std::string(getParentEdgeAt(THEN)->getDesc().getPrecision().name());
+            }
+            break;
+        }
+        case 4: {
+            switch (inputsPrecSize) {
+                case 1: { execute_impl<int32_t, uint8_t>(); break; }
+                case 2: { execute_impl<int32_t, uint16_t>(); break; }
+                case 4: { execute_impl<int32_t, uint32_t>(); break; }
+                case 8: { execute_impl<int32_t, uint64_t>(); break; }
+                default:
+                    IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: "
+                                  + std::string(getParentEdgeAt(THEN)->getDesc().getPrecision().name());
+            }
+            break;
+        }
+        default: {
+                IE_THROW() << "Select layer doesn't support 'Condition' inputs' precision: "
+                              + std::string(getParentEdgeAt(CONDITION)->getDesc().getPrecision().name());
+        }
+    }
+}
+
+bool MKLDNNSelectNode::created() const {
+    return getType() == Select;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNSelectNode, Select)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.h
new file mode 100644
index 00000000000..810b25c5333
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.h
@@ -0,0 +1,51 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNSelectNode : public MKLDNNNode {
+public:
+    MKLDNNSelectNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNSelectNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    enum { CONDITION, THEN, ELSE, numOfInputs };
+    enum { N, C, D, H, W, numOfDims };
+    enum class SelectBroadcastType {
+        NONE,
+        NUMPY
+    };
+
+    SelectBroadcastType broadcastType;
+    std::vector<size_t> resDims;
+    std::vector<size_t> resOffset;
+    std::vector<size_t> condOffset;
+    std::vector<size_t> thenOffset;
+    std::vector<size_t> elseOffset;
+
+    std::string errorPrefix;
+
+    void calcOutOffset(std::vector<size_t>& offset, const std::vector<size_t>& dims);
+    void calcInOffset(std::vector<size_t>& offset, const std::vector<size_t>& inDims, const std::vector<size_t>& outDims);
+    template <typename COND_T, typename DATA_T>
+    void execute_impl();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
index a247b09f936..6d4c9a27dc4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
@@ -4,7 +4,6 @@
 
 #include "mkldnn_softmax_node.h"
 
-#include <legacy/ie_layers.h>
 #include <string>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
@@ -13,33 +12,31 @@ using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNSoftMaxNode::MKLDNNSoftMaxNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+MKLDNNSoftMaxNode::MKLDNNSoftMaxNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    auto softmaxOp = ngraph::as_type_ptr<ngraph::op::v1::Softmax>(op);
+    if (softmaxOp) {
+        axis = softmaxOp->get_axis();
+    } else {
+        IE_THROW(NotImplemented)
+                << "CPU Softmax node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name();
+    }
+}
 
 void MKLDNNSoftMaxNode::getSupportedDescriptors() {
     if (descs.size())
         return;
 
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
     if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16)
         precision = InferenceEngine::Precision::FP32;
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
-    SoftMaxLayer* smLayer = dynamic_cast<SoftMaxLayer*>(getCnnLayer().get());
-    if (smLayer == nullptr)
-        IE_THROW() << "Cannot convert softmax layer.";
-
     if (getParentEdges().size() != 1)
         IE_THROW() << "Incorrect number of input edges for layer " << getName();
     if (!getChildEdges().size())
         IE_THROW() << "Incorrect number of output edges for layer " << getName();
 
-    axis = smLayer->axis;
-
-    if (axis >= getParentEdgeAt(0)->getDims().ndims()) {
-        IE_THROW() << "Incorrect axis!";
-    }
-
     if (getParentEdgeAt(0)->getDims().ndims() == 3) {
         MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::abc);
         createDescriptor({in_candidate}, {});
@@ -92,7 +89,7 @@ void MKLDNNSoftMaxNode::createPrimitive() {
 }
 
 bool MKLDNNSoftMaxNode::created() const {
-    return getType() == SoftMax;
+    return getType() == Softmax;
 }
 
 void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() {
@@ -127,4 +124,4 @@ void MKLDNNSoftMaxNode::createDescriptor(const std::vector<InferenceEngine::Tens
             new softmax_forward::desc(prop_kind::forward_scoring, in_candidate, axis)));
     descs.push_back(desc);
 }
-REG_MKLDNN_PRIM_FOR(MKLDNNSoftMaxNode, SoftMax);
+REG_MKLDNN_PRIM_FOR(MKLDNNSoftMaxNode, Softmax);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h
index 886cd5f89f0..58ec5904452 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h
@@ -14,7 +14,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNSoftMaxNode : public MKLDNNNode {
 public:
-    MKLDNNSoftMaxNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNSoftMaxNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNSoftMaxNode() override = default;
 
     void initOptimalPrimitiveDescriptor() override;
@@ -25,7 +25,7 @@ public:
     bool created() const override;
 
 private:
-    int axis = 0;
+    size_t axis = 0;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp
new file mode 100644
index 00000000000..4702f97e0fb
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp
@@ -0,0 +1,242 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <cmath>
+#include <vector>
+#include <string>
+#include <mkldnn_types.h>
+#include "ie_parallel.hpp"
+#include "utils/bfloat16.hpp"
+#include <mkldnn_selective_build.h>
+#include "mkldnn_space_to_batch_node.h"
+#include <nodes/common/tensor_desc_creator.h>
+#include <ngraph/opsets/opset2.hpp>
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+bool MKLDNNSpaceToBatchNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto spaceToBatch = std::dynamic_pointer_cast<const ngraph::opset2::SpaceToBatch>(op);
+        if (!spaceToBatch) {
+            errorMessage = "Only opset2 SpaceToBatch operation is supported";
+            return false;
+        }
+        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1)) == nullptr ||
+            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2)) == nullptr ||
+            std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(3)) == nullptr) {
+            errorMessage = "Only constant 'block_shape', 'pads_begin', 'pads_end' are supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNSpaceToBatchNode::MKLDNNSpaceToBatchNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = "BatchToSpace layer with name '" + op->get_friendly_name() + "'";
+
+    if (op->get_input_size() != 4 || op->get_output_size() != 1)
+        IE_THROW() << errorPrefix << " has incorrect number of input or output edges!";
+
+    inDims = op->get_input_shape(0);
+    outDims = op->get_output_shape(0);
+    if (inDims.size() < 4 || inDims.size() > 5)
+        IE_THROW() << errorPrefix << " has unsupported 'data' input rank: " << inDims.size();
+    if (inDims.size() != outDims.size())
+        IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions";
+
+    blockShapeIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<size_t>();
+    padsBeginIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(2))->cast_vector<size_t>();
+}
+
+void MKLDNNSpaceToBatchNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    const auto precision = getOriginalInputPrecisionAtPort(0);
+    const std::set<size_t> supported_precision_sizes = {1, 2, 4, 8};
+    if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end())
+        IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name();
+
+    addSupportedPrimDesc({{TensorDescCreatorTypes::nspc, precision},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp}},
+                         {{TensorDescCreatorTypes::nspc, precision}},
+                         impl_desc_type::ref_any);
+    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp},
+                          {TensorDescCreatorTypes::ncsp}},
+                         {{TensorDescCreatorTypes::ncsp, precision}},
+                         impl_desc_type::ref_any);
+    if (inDims[1] % 8 == 0) {
+        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp}},
+                             {{TensorDescCreatorTypes::nCsp8c, precision}},
+                             impl_desc_type::ref_any);
+    }
+    if (inDims[1] % 16 == 0) {
+        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp},
+                              {TensorDescCreatorTypes::ncsp}},
+                             {{TensorDescCreatorTypes::nCsp16c, precision}},
+                             impl_desc_type::ref_any);
+    }
+}
+
+static std::vector<size_t> getShape5D(const SizeVector &shape) {
+    std::vector<size_t> shape5D(5, 1);
+    for (int i = 0; i < 2; i++) {
+        shape5D[i] = shape[i];
+        shape5D[4 - i] = shape[shape.size() - 1 - i];
+    }
+    shape5D[2] = shape.size() == 5 ? shape[2] : shape5D[2];
+    return shape5D;
+}
+
+template<typename T>
+void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() {
+    const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    const auto layout = getParentEdgeAt(0)->getDesc().getLayout();
+    const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC;
+    const auto dimsSize = inDims.size();
+
+    auto inShape5D  = getShape5D(outDims);
+    auto outShape5D = getShape5D(inDims);
+    auto blockShape = getShape5D(blockShapeIn);
+
+    if (layout == NHWC || layout == NDHWC) {
+        inShape5D.push_back(inShape5D[1]);
+        inShape5D.erase(inShape5D.begin() + 1);
+        outShape5D.push_back(outShape5D[1]);
+        outShape5D.erase(outShape5D.begin() + 1);
+        blockShape.push_back(blockShape[1]);
+        blockShape.erase(blockShape.begin() + 1);
+    }
+
+    const size_t blockSize = blocked ? getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back() : 1lu;
+    const size_t blockCountInput = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
+    const size_t blockCountOutput = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
+    const auto blockRemainder = inShape5D[1] % blockSize;
+    const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;
+
+    const size_t inSpatialStep = inShape5D[2] * inShape5D[3] * inShape5D[4];
+    const size_t inBatchStep = (blocked ? blockSize * blockCountInput : inShape5D[1]) * inSpatialStep;
+
+    const size_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4];
+    const size_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep;
+
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        size_t start(0lu), end(0lu);
+        splitter(inShape5D[0] * inBatchStep, nthr, ithr, start, end);
+        std::fill(dstData + start, dstData + end, T(0));
+    });
+
+    size_t channels = (inShape5D[1] / blockSize);
+    channels = channels == 0 ? 1 : channels;
+    const size_t workAmount = inShape5D[0] * channels;
+
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        size_t start(0lu), end(0lu);
+        splitter(workAmount, nthr, ithr, start, end);
+        std::vector<size_t> indxStart(2, 0);
+        std::vector<size_t> indxEnd(2, 0);
+        parallel_it_init(start, indxStart[0], inShape5D[0], indxStart[1], channels);
+        parallel_it_init((end - 1), indxEnd[0], inShape5D[0], indxEnd[1], channels);
+        std::vector<int64_t> oAdd(5, 1);
+        std::vector<size_t> begin(5, 0);
+        std::vector<size_t> finish(5, 1);
+        for (size_t i0 = indxStart[0]; i0 < indxEnd[0] + 1; ++i0) {
+            int64_t bIdx = i0 / outShape5D[0];
+            const size_t srcIdx0 = (i0 - (bIdx * outShape5D[0])) * outBatchStep;
+            const size_t dstIdx0 = i0 * inBatchStep;
+            oAdd[4] = bIdx % blockShapeIn[dimsSize - 1] - padsBeginIn[dimsSize - 1];
+            bIdx /= blockShapeIn[dimsSize - 1];
+            oAdd[3] = bIdx % blockShapeIn[dimsSize - 2] - padsBeginIn[dimsSize - 2];
+            bIdx /= blockShapeIn[dimsSize - 2];
+            oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu;
+            bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
+            oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1];
+            if (layout == NHWC || layout == NDHWC) {
+                oAdd.push_back(oAdd[1]);
+                oAdd.erase(oAdd.begin() + 1);
+            }
+            begin[1] = (blockShape[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
+            finish[1] = (outShape5D[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
+            begin[2] = (blockShape[2] - 1 - oAdd[2]) / blockShape[2];
+            finish[2] = (outShape5D[2] - 1 - oAdd[2]) / blockShape[2];
+            begin[3] = (blockShape[3] - 1 - oAdd[3]) / blockShape[3];
+            finish[3] = (outShape5D[3] - 1 - oAdd[3]) / blockShape[3];
+            begin[4] = (blockShape[4] - 1 - oAdd[4]) / blockShape[4];
+            finish[4] = (outShape5D[4] - 1 - oAdd[4]) / blockShape[4];
+            const int64_t addTmpOC = blocked ? 0lu : oAdd[1];
+            const int64_t addTmpOc = blocked ? oAdd[1] : 0lu;
+            indxStart[1] = begin[1] > indxStart[1] ? begin[1] : indxStart[1];
+            const size_t lastI1 = i0 == indxEnd[0] ? (indxEnd[1] > finish[1] ? finish[1] : indxEnd[1]) : finish[1];
+            for (; indxStart[1] < lastI1 + 1; ++indxStart[1]) {
+                const size_t block = indxStart[1] == finish[1] ? lastBlock : blockSize;
+                const int64_t tmpOC = indxStart[1] * blockShape[1] + addTmpOC;
+                const size_t srcIdx1 = srcIdx0 + tmpOC * outSpatialStep * blockSize;
+                const size_t dstIdx1 = dstIdx0 + indxStart[1] * inSpatialStep * blockSize;
+                const size_t itEnd = blocked ? ((block - 1) * blockShape[1] + oAdd[1]) / blockSize : 0lu;
+                for (size_t i2 = begin[2]; i2 < finish[2] + 1; ++i2) {
+                    const int64_t tmpOd = i2 * blockShape[2] + oAdd[2];
+                    const size_t srcIdx2 = srcIdx1 + tmpOd * outShape5D[3] * outShape5D[4] * blockSize;
+                    const size_t dstIdx2 = dstIdx1 + i2 * inShape5D[3] * inShape5D[4] * blockSize;
+                    for (size_t i3 = begin[3]; i3 < finish[3] + 1; ++i3) {
+                        const int64_t tmpOh = i3 * blockShape[3] + oAdd[3];
+                        const size_t srcIdx3 = srcIdx2 + tmpOh * outShape5D[4] * blockSize;
+                        const size_t dstIdx3 = dstIdx2 + i3 * inShape5D[4] * blockSize;
+                        for (size_t i4 = begin[4]; i4 < finish[4] + 1; ++i4) {
+                            const int64_t tmpOw = i4 * blockShape[4] + oAdd[4];
+                            const size_t srcIdx4 = srcIdx3 + tmpOw * blockSize;
+                            const size_t dstIdx4 = dstIdx3 + i4 * blockSize;
+                            for (size_t it = 0; it < itEnd + 1; ++it) {
+                                const size_t i5Begin = it == 0 ? 0 : (it * blockSize - 1 - oAdd[1]) / blockShape[1] + 1;
+                                const size_t i5End = it == itEnd ? (block - 1) : ((it + 1) * blockSize - 1 - oAdd[1]) / blockShape[1];
+                                for (size_t i5 = i5Begin; i5 < i5End + 1; ++i5) {
+                                    const int64_t tmpOc = i5 * blockShape[1] + addTmpOc;
+                                    const size_t srcIdx5 = srcIdx4 + it * outSpatialStep * blockSize + (tmpOc - it * blockSize);
+                                    const size_t dstIdx5 = dstIdx4 + i5;
+                                    dstData[dstIdx5] = srcData[srcIdx5];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            indxStart[1] = 0lu;
+        }
+    });
+}
+
+void MKLDNNSpaceToBatchNode::execute(mkldnn::stream strm) {
+    switch (getParentEdgeAt(0)->getDesc().getPrecision().size()) {
+        case 1: SpaceToBatchKernel<PrecisionTrait<Precision::U8>::value_type>();  break;
+        case 2: SpaceToBatchKernel<PrecisionTrait<Precision::U16>::value_type>(); break;
+        case 4: SpaceToBatchKernel<PrecisionTrait<Precision::I32>::value_type>(); break;
+        default:
+            IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getDesc().getPrecision().name()) + "'";
+    }
+}
+
+bool MKLDNNSpaceToBatchNode::created() const {
+    return getType() == SpaceToBatch;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNSpaceToBatchNode, SpaceToBatch)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.h
new file mode 100644
index 00000000000..58b2b8661de
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.h
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNSpaceToBatchNode : public MKLDNNNode {
+public:
+    MKLDNNSpaceToBatchNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNSpaceToBatchNode() override = default;
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    InferenceEngine::SizeVector inDims;
+    InferenceEngine::SizeVector outDims;
+    std::vector<size_t> blockShapeIn;
+    std::vector<size_t> padsBeginIn;
+
+    std::string errorPrefix;
+
+    template<typename T>
+    void SpaceToBatchKernel();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp
index 4965920d708..71eb63f62bb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp
@@ -4,10 +4,11 @@
 
 #include "mkldnn_space_to_depth_node.h"
 
-#include <legacy/ie_layers.h>
 #include <cpu/x64/jit_generator.hpp>
 #include <mkldnn_extension_utils.h>
 #include "common/tensor_desc_creator.h"
+#include <utils/general_utils.h>
+#include <ngraph/opsets/opset1.hpp>
 
 #include <string>
 #include <cmath>
@@ -20,43 +21,59 @@ using namespace mkldnn;
 using namespace mkldnn::impl;
 using namespace mkldnn::impl::cpu::x64;
 
-MKLDNNSpaceToDepthNode::MKLDNNSpaceToDepthNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNSpaceToDepthNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto spaceToDepth = std::dynamic_pointer_cast<const ngraph::opset1::SpaceToDepth>(op);
+        if (!spaceToDepth) {
+            errorMessage = "Only opset1 SpaceToDepth operation is supported";
+            return false;
+        }
+        const auto mode = spaceToDepth->get_mode();
+        if (!one_of(mode, ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST, ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST)) {
+            errorMessage = "Does not support mode: " + ngraph::as_string(mode);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNSpaceToDepthNode::MKLDNNSpaceToDepthNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        const auto spaceToDepth = std::dynamic_pointer_cast<const ngraph::opset1::SpaceToDepth>(op);
+
+        const auto modeNgraph = spaceToDepth->get_mode();
+        if (modeNgraph == ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST) {
+            mode = Mode::BLOCKS_FIRST;
+        } else if (modeNgraph == ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST) {
+            mode = Mode::DEPTH_FIRST;
+        } else {
+            THROW_ERROR << "doesn't support mode: " << ngraph::as_string(modeNgraph);
+        }
+
+        blockSize = spaceToDepth->get_block_size();
+        if (blockSize == 0)
+            THROW_ERROR << "has incorrect block_size parameter is zero!";
+
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNSpaceToDepthNode::getSupportedDescriptors() {
-    auto* spaceToDepthLayer = dynamic_cast<SpaceToDepthLayer*>(getCnnLayer().get());
-    if (spaceToDepthLayer == nullptr)
-        THROW_ERROR << "cannot convert from CNN layer";
-
-    if (spaceToDepthLayer->insData[0].lock() == nullptr)
-        THROW_ERROR << "has nullable input data";
-
-    SizeVector srcDims = spaceToDepthLayer->insData[0].lock()->getTensorDesc().getDims();
+    SizeVector srcDims = inDims[0].ToSizeVector();
     if (srcDims.size() < 3)
         THROW_ERROR << "has incorrect number of input dimensions";
     if (srcDims.size() > 5)
         THROW_ERROR << "doesn't support dimensions with rank greater than 5";
 
-    if (spaceToDepthLayer->outData[0] == nullptr)
-        THROW_ERROR << "has nullable output data";
-
-    SizeVector dstDims = spaceToDepthLayer->outData[0]->getTensorDesc().getDims();
+    SizeVector dstDims = outDims[0].ToSizeVector();
     if (srcDims.size() != dstDims.size())
         THROW_ERROR << "has incorrect number of input/output dimensions";
 
-    std::string modeString = spaceToDepthLayer->GetParamAsString("mode");
-    if (modeString == "blocks_first") {
-        mode = Mode::BLOCKS_FIRST;
-    } else if (modeString == "depth_first") {
-        mode = Mode::DEPTH_FIRST;
-    } else {
-        THROW_ERROR << "doesn't support mode: " << modeString;
-    }
-
-    blockSize = spaceToDepthLayer->GetParamAsUInt("block_size", 1);
-    if (blockSize == 0)
-        THROW_ERROR << "has incorrect block_size parameter is zero!";
-
     size_t nSpatialDims = srcDims.size() - 2;
     blockStep = static_cast<size_t>(std::pow(blockSize, nSpatialDims));
     if (dstDims[1] % blockStep)
@@ -80,7 +97,7 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
     auto srcDims = getParentEdgeAt(0)->getDims();
     const size_t nDims = srcDims.ndims();
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.h
index 0d16bd171f6..7fa7b224201 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.h
@@ -13,7 +13,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNSpaceToDepthNode : public MKLDNNNode {
 public:
-    MKLDNNSpaceToDepthNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNSpaceToDepthNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNSpaceToDepthNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -22,6 +22,8 @@ public:
     void execute(mkldnn::stream strm) override;
     bool created() const override;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     enum Mode {
         BLOCKS_FIRST = 0,
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
index 316eeab315b..201bebf4e63 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
@@ -5,11 +5,11 @@
 #include "mkldnn_split_node.h"
 #include "common/cpu_memcpy.h"
 #include "common/tensor_desc_creator.h"
-#include <legacy/ie_layers.h>
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include <ie_parallel.hpp>
+#include "utils/general_utils.h"
 
 #define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' "
 
@@ -17,23 +17,55 @@ using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNSplitNode::MKLDNNSplitNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNSplitNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (!MKLDNNPlugin::one_of(op->get_type_info(), ngraph::op::v1::Split::type_info, ngraph::op::v1::VariadicSplit::type_info)) {
+            errorMessage = "Only opset1 Split and VariadicSplit operations are supported";
+            return false;
+        }
+        auto axisOp = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+        if (!axisOp) {
+            errorMessage = "Constant expected as the axis input.";
+            return false;
+        }
+        if (op->get_input_size() > 2) {
+            auto splitLengthsOp = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+            if (!splitLengthsOp) {
+                errorMessage = "Constant expected as the split_lengths input.";
+                return false;
+            }
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNSplitNode::MKLDNNSplitNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    if (ngraph::as_type_ptr<const ngraph::op::v1::Split>(op)) {
+        INPUTS_NUM = 2;
+    } else if (ngraph::as_type_ptr<const ngraph::op::v1::VariadicSplit>(op)) {
+        INPUTS_NUM = 3;
+    }
+
+    auto axisOp = ngraph::as_type_ptr<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+    auto axis = axisOp->cast_vector<int64_t>()[0];
+    if (axis < 0) {
+        axis += op->get_input_shape(0).size();
+    }
+    if (axis >= op->get_input_shape(0).size()) {
+        THROW_ERROR << "Split node with name '" << op->get_friendly_name() << "' has invalid value of axis parameter: " << axis;
+    }
+    this->axis = axis;
+}
 
 void MKLDNNSplitNode::getSupportedDescriptors() {
-    auto splitLayer = dynamic_cast<SplitLayer*>(getCnnLayer().get());
-
-    if (splitLayer == nullptr)
-        THROW_ERROR << "can not convert from CNN layer.";
-
-    if (getParentEdges().size() != 1)
-        THROW_ERROR << "has incorrect number of input nodes.";
-    if (getChildEdges().empty())
-        THROW_ERROR << "has incorrect number of output nodes.";
-
-    axis = splitLayer->_axis;
-    if (axis >= getParentEdgeAt(0)->getDims().ndims())
-        THROW_ERROR << "has invalid value of axis parameter.";
 }
 
 void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
@@ -42,15 +74,6 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    if (getCnnLayer()->insData.empty()) {
-        THROW_ERROR << "has an empty input in the CNN layer";
-    }
-
-    auto inpData = getCnnLayer()->insData[0].lock();
-    if (!inpData) {
-        THROW_ERROR << "input data is empty";
-    }
-
     auto srcDims = getParentEdgeAt(0)->getDims();
     auto axis_size = 0;
     auto dstFirstDims = getChildEdgeAt(0)->getDims();
@@ -72,7 +95,8 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
     if (dstFirstDims.size() != srcDims.size())
         THROW_ERROR << "sizes of input blob and sum of output blobs are not equal.";
 
-    InferenceEngine::Precision inpPrecision = inpData->getPrecision();
+    InferenceEngine::Precision inpPrecision = getOriginalInputPrecisionAtPort(0);
+    const auto axisPrecision = getOriginalInputPrecisionAtPort(1);
     auto outPrecision = inpPrecision; // the split layer doesn't convert precisions
 
     bool dynBatchSupport = true;
@@ -111,10 +135,19 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
         InferenceEngine::LayerConfig config;
 
         config.dynBatchSupport = dynBatchSupport;
-        config.inConfs.resize(1);
+        config.inConfs.resize(INPUTS_NUM);
         config.inConfs[0].inPlace = -1;
         config.inConfs[0].constant = false;
         config.inConfs[0].desc = itr->second->createDesc(inpPrecision, srcDims.ToSizeVector());
+        config.inConfs[1].inPlace = -1;
+        config.inConfs[1].constant = true;
+        config.inConfs[1].desc.setDims({1});
+        config.inConfs[1].desc.setPrecision(axisPrecision);
+        if (INPUTS_NUM == 3) {
+            config.inConfs[2].desc = TensorDesc(axisPrecision, SizeVector{outDims.size()}, TensorDesc::getLayoutByDims(SizeVector{outDims.size()}));
+            config.inConfs[2].constant = true;
+        }
+
         config.outConfs.resize(outDims.size());
 
         std::vector<memory::format_tag> outFormats;
@@ -180,10 +213,18 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
         InferenceEngine::LayerConfig config;
 
         config.dynBatchSupport = dynBatchSupport;
-        config.inConfs.resize(1);
+        config.inConfs.resize(INPUTS_NUM);
         config.inConfs[0].inPlace = -1;
         config.inConfs[0].constant = false;
         config.inConfs[0].desc = creatorsMap.at(TensorDescCreatorTypes::nspc)->createDesc(inpPrecision, srcDims.ToSizeVector());
+        config.inConfs[1].inPlace = -1;
+        config.inConfs[1].constant = true;
+        config.inConfs[1].desc.setDims({1});
+        config.inConfs[1].desc.setPrecision(axisPrecision);
+        if (INPUTS_NUM == 3) {
+            config.inConfs[2].desc = TensorDesc(axisPrecision, SizeVector{outDims.size()}, TensorDesc::getLayoutByDims(SizeVector{outDims.size()}));
+            config.inConfs[2].constant = true;
+        }
         config.outConfs.resize(outDims.size());
 
         std::vector<memory::format_tag> outFormats;
@@ -308,13 +349,10 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
                                                                       config.inConfs[i].desc.getBlockingDesc().getOrder()
                                                               });
     }
-    const auto& cnnLayer = getCnnLayer();
-    if (!cnnLayer)
-        THROW_ERROR << "cannot be created without CNNLayer!";
     if (config.outConfs.size() != outDims.size())
         THROW_ERROR << "has invalid config";
     size_t offset = 0;
-    for (size_t i = 0; i < cnnLayer->outData.size(); i++) {
+    for (size_t i = 0; i < outDims.size(); i++) {
         config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
                                                               config.outConfs[i].desc.getDims(), {
                                                                       config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h
index af546860f39..ea9c1efcc35 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h
@@ -12,9 +12,10 @@ namespace MKLDNNPlugin {
 
 class MKLDNNSplitNode : public MKLDNNNode {
 public:
-    MKLDNNSplitNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNSplitNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNSplitNode() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
     void selectOptimalPrimitiveDescriptor() override;
@@ -43,6 +44,8 @@ private:
         size_t srcDataStride;
         size_t countStrides;
     } optimizedParams;
+
+    size_t INPUTS_NUM = 2;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
index f357fe430d3..e50296f6c23 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
@@ -7,18 +7,18 @@
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 
-#include <legacy/ie_layers.h>
 #include "ie_parallel.hpp"
 #include "caseless.hpp"
 #include "common/cpu_memcpy.h"
 #include "common/tensor_desc_creator.h"
 #include "utils/general_utils.h"
+#include "mkldnn_input_node.h"
 
 #include <string>
 #include <tuple>
 #include <algorithm>
 #include "caseless.hpp"
-
+#include <ngraph/opsets/opset1.hpp>
 
 #define THROW_ERROR IE_THROW() << "StridedSlice layer with name '" << getName() << "' "
 
@@ -35,26 +35,65 @@ static inline size_t parallel_init(size_t start, size_t nDims, const SizeVector&
     return start;
 }
 
-MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto ss = std::dynamic_pointer_cast<const ngraph::opset1::StridedSlice>(op);
+        if (!ss) {
+            errorMessage = "Only opset1 StridedSlice operation is supported";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        const auto ss = std::dynamic_pointer_cast<const ngraph::opset1::StridedSlice>(op);
+
+        const size_t nDims = std::max(inDims[DATA_ID].ndims(), outDims[0].ndims());
+
+        auto createMask = [&](const std::vector<int64_t> &origMask, const int bit = 0, bool needReverse = false) {
+            std::vector<int> mask(origMask.begin(), origMask.end());
+            if (needReverse) {
+                for (size_t i = 0; i < mask.size(); i++)
+                    mask[i] = 1 - mask[i];
+            }
+            for (size_t i = mask.size(); i < nDims; ++i) mask.push_back(bit);
+            return mask;
+        };
+
+        beginMask = createMask(ss->get_begin_mask(), 1, true);
+        endMask = createMask(ss->get_end_mask(), 1, true);
+        newAxisMask = createMask(ss->get_new_axis_mask());
+        shrinkAxisMask = createMask(ss->get_shrink_axis_mask());
+
+        auto origEllipsisMask = ss->get_ellipsis_mask();
+        for (const auto &o : origEllipsisMask) {
+            ellipsisMask.push_back(o);
+        }
+        if (ellipsisMask.size() == 0) {
+            for (size_t i = ellipsisMask.size(); i < nDims; ++i) ellipsisMask.push_back(0);
+        }
+
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNStridedSliceNode::getSupportedDescriptors() {
-    auto stridedSliceLayer = getCnnLayer();
+    auto isConstantNode = [](const MKLDNNNodePtr &node) {
+        return node->isConstant() && node->getType() == Input;
+    };
 
-    if (stridedSliceLayer == nullptr)
-        THROW_ERROR << "cannot convert from CNN layer";
+    params.parametersAreConstant = isConstantNode(getParentEdgesAtPort(BEGIN_ID)[0]->getParent()) &&
+                                   isConstantNode(getParentEdgesAtPort(END_ID)[0]->getParent());
 
-    auto inData = stridedSliceLayer->insData[DATA_ID].lock();
-    auto beginData = stridedSliceLayer->insData[BEGIN_ID].lock();
-    auto endData = stridedSliceLayer->insData[END_ID].lock();
-    if (!inData || !beginData || !endData)
-        THROW_ERROR << "has nullable input data";
-
-    params.parametersAreConstant = CaselessEq<std::string>()(getParentEdgesAtPort(BEGIN_ID)[0]->getParent()->getCnnLayer()->type, "const") &&
-                                   CaselessEq<std::string>()(getParentEdgesAtPort(END_ID)[0]->getParent()->getCnnLayer()->type, "const");
-
-    const SizeVector srcDims = inData->getTensorDesc().getDims();
-    const SizeVector dstDims = stridedSliceLayer->outData[0]->getTensorDesc().getDims();
+    const SizeVector srcDims = inDims[DATA_ID].ToSizeVector();
+    const SizeVector dstDims = outDims[0].ToSizeVector();
     const size_t nSrcDims = srcDims.size();
     const size_t nDims = std::max(nSrcDims, dstDims.size());
 
@@ -63,43 +102,27 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
     if (!getChildEdges().size())
         THROW_ERROR << "has incorrect number of output edges";
 
-    beginDims = beginData->getTensorDesc().getDims();
+    beginDims = inDims[BEGIN_ID].ToSizeVector();
     if (beginDims.size() != 1)
         THROW_ERROR << " should have begin vector with 1 dimension";
 
-    endDims = endData->getTensorDesc().getDims();
+    endDims = inDims[END_ID].ToSizeVector();
     if (endDims.size() != 1)
         THROW_ERROR << "should have end vector with 1 dimension";
     if (beginDims[0] != endDims[0])
         THROW_ERROR << "should have begin vector with size equal to end vector size";
 
-    if (stridedSliceLayer->insData.size() > STRIDE_ID) {
-        auto strideData = stridedSliceLayer->insData[STRIDE_ID].lock();
-        if (!strideData)
-            THROW_ERROR << "has nullable input data";
-        if (!CaselessEq<std::string>()(getParentEdgesAtPort(STRIDE_ID)[0]->getParent()->getCnnLayer()->type, "const"))
+    if (inDims.size() > STRIDE_ID) {
+        if (!isConstantNode(getParentEdgesAtPort(STRIDE_ID)[0]->getParent()))
             params.parametersAreConstant = false;
 
-        strideDims = strideData->getTensorDesc().getDims();
+        strideDims = inDims[STRIDE_ID].ToSizeVector();
         if (strideDims.size() > 1)
             THROW_ERROR << "should have stride vector with 1 dimension";
         if (beginDims[0] != strideDims[0])
             THROW_ERROR << "should have stride vector with size equal to begin vector size";
     }
 
-    auto createMask = [&](const char* maskName, std::vector<int>& mask, const int bit = 0) {
-        mask = stridedSliceLayer->GetParamAsInts(maskName);
-        if (strcmp(maskName, "ellipsis_mask") != 0 || mask.size() == 0) {
-            for (size_t i = mask.size(); i < nDims; ++i) mask.push_back(bit);
-        }
-    };
-
-    createMask("begin_mask", beginMask, 1);
-    createMask("end_mask", endMask, 1);
-    createMask("new_axis_mask", newAxisMask);
-    createMask("shrink_axis_mask", shrinkAxisMask);
-    createMask("ellipsis_mask", ellipsisMask);
-
     int ellipsisMaskCounter = 0;
     params.ellipsisPos1 = -1;
     for (size_t i = 0; i < ellipsisMask.size(); i++) {
@@ -115,8 +138,7 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
 
     if (params.parametersAreConstant) {
         auto fillingInParameters = [&](std::vector<int> &parameter, const size_t type, const size_t size, const int value) {
-            auto parentLayer = getParentEdgesAtPort(type)[0]->getParent()->getCnnLayer();
-            auto blob = parentLayer->blobs["custom"];
+            auto blob = std::dynamic_pointer_cast<MKLDNNInputNode>(getParentEdgesAtPort(type)[0]->getParent())->getConstBlob();
             if (blob->getTensorDesc().getPrecision() != Precision::I32)
                 THROW_ERROR << "supports only parameters input with precision I32";
             const int *ptr = blob->cbuffer().as<const int *>() + blob->getTensorDesc().getBlockingDesc().getOffsetPadding();
@@ -171,14 +193,14 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
         return;
 
     const bool hasStrides = getParentEdges().size() > 3;
-    InferenceEngine::Precision dataPrecision = getCnnLayer()->insData[DATA_ID].lock()->getPrecision();
-    InferenceEngine::Precision beginPrecision = getCnnLayer()->insData[BEGIN_ID].lock()->getPrecision();
+    InferenceEngine::Precision dataPrecision = getOriginalInputPrecisionAtPort(DATA_ID);
+    InferenceEngine::Precision beginPrecision = getOriginalInputPrecisionAtPort(BEGIN_ID);
     auto beginDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(beginPrecision);
-    InferenceEngine::Precision endPrecision = getCnnLayer()->insData[END_ID].lock()->getPrecision();
+    InferenceEngine::Precision endPrecision = getOriginalInputPrecisionAtPort(END_ID);
     auto endDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(endPrecision);
     InferenceEngine::Precision stridePrecision;
     if (hasStrides)
-        stridePrecision = getCnnLayer()->insData[STRIDE_ID].lock()->getPrecision();
+        stridePrecision = getOriginalInputPrecisionAtPort(STRIDE_ID);
 
     auto srcDims = getParentEdgeAt(DATA_ID)->getDims();
     auto dstDims = getChildEdgeAt(0)->getDims();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h
index 577757791ff..33da479d30a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h
@@ -13,7 +13,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNStridedSliceNode : public MKLDNNNode {
 public:
-    MKLDNNStridedSliceNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNStridedSliceNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -25,6 +25,8 @@ public:
         return false;
     }
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
     void stridedSliceV();
     void stridedSlice();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
index 67e8c09dfec..c9a53c79e07 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
@@ -4,13 +4,12 @@
 
 #include "mkldnn_tensoriterator_node.h"
 
-#include <legacy/ie_layers.h>
-#include <legacy/ie_layers_internal.hpp>
 #include <string>
 #include <vector>
 #include <map>
-#include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
+#include <ie_ngraph_utils.hpp>
+#include <utils/general_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -18,28 +17,24 @@ using namespace InferenceEngine::details;
 
 namespace MKLDNNPlugin {
 
-static InferenceEngine::LayerConfig make_plain_config(const InferenceEngine::CNNLayerPtr &layer) {
-    using namespace InferenceEngine;
+static InferenceEngine::LayerConfig make_plain_config(const std::shared_ptr<ngraph::Node>& op) {
+    InferenceEngine::LayerConfig config;
 
-    LayerConfig config;
+    for (size_t i = 0; i < op->get_input_size(); i++) {
+        const auto& dims = op->get_input_shape(i);
+        const auto prec = InferenceEngine::details::convertPrecision(op->get_input_element_type(i));
 
-    for (const auto &in_w : layer->insData) {
-        const auto in = in_w.lock();
-
-        const auto dims = in->getDims();
-        const auto prec = in->getPrecision();
-
-        DataConfig data_conf {};
-        data_conf.desc = TensorDesc { prec, dims, TensorDesc::getLayoutByDims(dims) };
+        InferenceEngine::DataConfig data_conf {};
+        data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) };
         config.inConfs.push_back(data_conf);
     }
 
-    for (const auto &out : layer->outData) {
-        const auto dims = out->getDims();
-        const auto prec = out->getPrecision();
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        const auto& dims = op->get_output_shape(i);
+        const auto prec = InferenceEngine::details::convertPrecision(op->get_output_element_type(i));
 
-        DataConfig data_conf {};
-        data_conf.desc = TensorDesc { prec, dims, TensorDesc::getLayoutByDims(dims) };
+        InferenceEngine::DataConfig data_conf {};
+        data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) };
         config.outConfs.push_back(data_conf);
     }
 
@@ -50,7 +45,7 @@ static InferenceEngine::LayerConfig make_plain_config(const InferenceEngine::CNN
 class PortIteratorHelper : public PortMapHelper {
 public:
     PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src,
-                       const InferenceEngine::TensorIterator::PortMap &slice_rule, const mkldnn::engine& eng)
+                       const PortMap &slice_rule, const mkldnn::engine& eng)
                        : sliced_src(sliced_src) {
         const auto &full_blob = sliced_src ? from : to;
         const auto &part_blob = !sliced_src ? from : to;
@@ -186,52 +181,214 @@ private:
 
 }  // namespace MKLDNNPlugin
 
-MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+int getNumIteration(const std::shared_ptr<const ngraph::Node>& op, const std::vector<PortMap>& inputPortMap, const std::vector<PortMap>& outputPortMap) {
+    const auto isIterable = [](const PortMap& rule) { return rule.axis != -1; };
+
+    const auto getNumIterations = [](const PortMap& rule, const std::vector<size_t>& dimensions) -> int {
+        const auto axis = rule.axis;
+        if (axis < 0 || static_cast<std::size_t>(axis) >= dimensions.size()) {
+            IE_THROW() << R"(: Invalid "axis" value in an iteration component: )"
+                               << rule.axis  << ", dimensions number = " << dimensions.size() << " (out of range)";
+        }
+        const auto space = dimensions[axis];
+        const int start = static_cast<int>((rule.start < 0 ? (space + 1) : 0) + rule.start);
+        const int end   = static_cast<int>((rule.end   < 0 ? (space + 1) : 0) + rule.end);
+
+        const auto stride = rule.stride;
+        if (stride == 0) {
+            IE_THROW() << R"(: Invalid "stride" value in an iteration component: )" << rule.stride << " (infinite loop)";
+        }
+        const auto step = std::abs(stride);
+
+        const auto src = stride < 0 ? end : start;
+        const auto dst = stride < 0 ? start : end;
+        const auto length = dst - src;
+        if (src < 0 || src >= dst || dst > static_cast<int64_t>(space) || length < step) {
+            IE_THROW() << R"(: Invalid "start"/"stride"/"end" values in an iteration component)"
+                               << ": \"start\" = " << rule.start << ", \"stride\" = " << rule.stride  << ", \"end\" = " << rule.end;
+        }
+
+        if (length % step != 0) {
+            IE_THROW() << ": Each iteration must be the same size: length (" << length << ") is not divisible by step (" << step << ")";
+        }
+
+        return static_cast<int>(length / step);
+    };
+
+
+    int numIterations = 1;
+    bool isDefault = true;
+    for (const auto& rule : inputPortMap) {
+        if (!isIterable(rule)) {
+            continue;
+        }
+
+        if (rule.from < 0 || rule.from >= static_cast<int64_t>(op->get_input_size())) {
+            IE_THROW() << R"(: Invalid "from" value: "from" = )" << rule.from
+                               << " inputs number = " << op->get_input_size() << " (out of range)";
+        }
+
+        const auto currentNumIterations = getNumIterations(rule, op->get_input_shape(rule.from));
+        if (isDefault) {
+            isDefault = false;
+            numIterations = currentNumIterations;
+        } else if (numIterations != currentNumIterations) {
+            IE_THROW() << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations;
+        }
+    }
+
+    for (const auto& rule : outputPortMap) {
+        if (!isIterable(rule)) {
+            continue;
+        }
+
+        if (rule.from < 0 || rule.from >= static_cast<int64_t>(op->get_output_size())) {
+            IE_THROW() << R"(: Invalid "from" value: "from" = )" << rule.from
+                               << " inputs number = " << op->get_output_size() << " (out of range)";
+        }
+
+        const auto currentNumIterations = getNumIterations(rule, op->get_output_shape(rule.from));
+        if (isDefault) {
+            isDefault = false;
+            numIterations = currentNumIterations;
+        } else if (numIterations != currentNumIterations) {
+            IE_THROW() << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations;
+        }
+    }
+
+    return numIterations;
+}
+
+bool MKLDNNTensorIteratorNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (!one_of(op->get_type_info(),
+                ngraph::op::v0::TensorIterator::type_info,
+                ngraph::op::v5::Loop::type_info)) {
+            errorMessage = "Only opset1 TensorIterator or opset5 Loop operations are supported.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache), ngraphOp(op) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
-    auto *ti = dynamic_cast<class InferenceEngine::TensorIterator*>(getCnnLayer().get());
-    if (ti == nullptr)
-        IE_THROW() << "Cannot convert to TensorIterator layer.";
+    auto tiOp = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp>(ngraphOp);
+    const std::shared_ptr<const ngraph::Function> body = tiOp->get_function();
+    sub_graph.CreateGraph(body, ext_mng, weightCache);
 
-    n_iter = getNumIteration(*ti);
-    sub_graph.CreateGraph(ti->body, ext_mng, weightCache);
-
-    // Try to detect inputs and outputs by indexes
-    const auto &in_map = sub_graph.GetInputNodes();
-    for (const auto &in_data : ti->body.inputs) {
-        if (in_data->getName() == "const_holder") continue;
-
-        auto &in_node = in_map.at(in_data->getName());
-        auto in_mem = in_node->getChildEdgeAt(0)->getMemoryPtr();
-        input_mem.push_back(in_mem);
+    const auto &inMap = sub_graph.GetInputNodesMap();
+    for (const auto &param : tiOp->get_function()->get_parameters()) {
+        auto inNode = inMap.find(param->get_friendly_name());
+        if (inNode != inMap.end()) {
+            auto inMem = inNode->second->getChildEdgeAt(0)->getMemoryPtr();
+            input_mem.push_back(inMem);
+        }
     }
 
-    // Assume that order of outputs in original TI and produces sub_graph is same
-    const auto &out_vec = sub_graph.GetOutputNodes();
-    for (size_t i = 0; i < out_vec.size(); i++) {
-        auto out_mem = out_vec[i]->getParentEdgeAt(0)->getMemoryPtr();
-        output_mem.push_back(out_mem);
+    const auto &outMap = sub_graph.GetOutputNodesMap();
+    for (const auto &out : tiOp->get_function()->get_results()) {
+        auto prev = out->get_input_node_shared_ptr(0);
+        std::string inputID = prev->get_friendly_name();
+        if (prev->get_output_size() > 1) {
+            inputID += "." + std::to_string(out->get_input_source_output(0).get_index());
+        }
+        auto outNode = outMap.find(inputID);
+        if (outNode != outMap.end()) {
+            auto outMem = outNode->second->getParentEdgeAt(0)->getMemoryPtr();
+            output_mem.push_back(outMem);
+        }
     }
+
+    // Port map: outputs
+    for (const auto& desc : tiOp->get_output_descriptions()) {
+        auto body_output_idx = desc->m_body_value_index;
+
+        std::string type_name = desc->get_type_info().name;
+        if (type_name == "ConcatOutputDescription") {
+            auto output_desc = ::ngraph::as_type_ptr<ngraph::op::util::SubGraphOp::ConcatOutputDescription>(desc);
+            IE_ASSERT(output_desc != nullptr);
+
+            outputPortMap.emplace_back(PortMap {
+                static_cast<int>(output_desc->m_output_index), static_cast<int>(body_output_idx),
+                static_cast<int>(output_desc->m_axis), static_cast<int>(output_desc->m_stride),
+                static_cast<int>(output_desc->m_start), static_cast<int>(output_desc->m_end),
+                static_cast<int>(output_desc->m_part_size)});
+        } else if (type_name == "BodyOutputDescription") {
+            auto output_desc = ::ngraph::as_type_ptr<ngraph::op::util::SubGraphOp::BodyOutputDescription>(desc);
+            IE_ASSERT(output_desc != nullptr);
+
+            outputPortMap.emplace_back(PortMap {
+                static_cast<int>(output_desc->m_output_index), static_cast<int>(body_output_idx), -1, 1, 0, -1, 1});
+        } else {
+            IE_THROW() << "Incorrect type of the output description.";
+        }
+    }
+
+    // Port map : inputs and back edges
+    for (const auto& desc : tiOp->get_input_descriptions()) {
+        auto body_input_index = desc->m_body_parameter_index;
+
+        if (const auto slice_desc = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp::SliceInputDescription>(desc)) {
+            inputPortMap.emplace_back(PortMap {
+                static_cast<int>(slice_desc->m_input_index), static_cast<int>(body_input_index),
+                static_cast<int>(slice_desc->m_axis), static_cast<int>(slice_desc->m_stride),
+                static_cast<int>(slice_desc->m_start), static_cast<int>(slice_desc->m_end),
+                static_cast<int>(slice_desc->m_part_size)});
+        } else if (const auto merge_desc = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp::MergedInputDescription>(desc)) {
+            inputPortMap.emplace_back(PortMap {
+                static_cast<int>(merge_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+
+            auto body_output_idx = merge_desc->m_body_value_index;
+
+            backEdges.emplace_back(PortMap {
+                static_cast<int>(body_output_idx), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+        } else if (const auto inv_desc = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp::InvariantInputDescription>(desc)) {
+            inputPortMap.emplace_back(PortMap {
+                    static_cast<int>(inv_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+        } else {
+            IE_THROW() << "Incorrect type of the input description.";
+        }
+    }
+
+    n_iter = getNumIteration(ngraphOp, inputPortMap, outputPortMap);
+
+    if (const auto loopOp = std::dynamic_pointer_cast<const ngraph::op::v5::Loop>(ngraphOp)) {
+        auto spec_port = loopOp->get_special_body_ports();
+        if (spec_port.current_iteration_input_idx != -1) {
+            loopBodyCurrentIterationIdx.push_back(spec_port.current_iteration_input_idx);
+        }
+        if (spec_port.body_condition_output_idx != -1) {
+            loopBodyConditionOutputIdx = spec_port.body_condition_output_idx;
+        }
+        loopTripCountIdx = 0;
+        loopExecutionConditionIdx = 1;
+    }
+
+    config = make_plain_config(ngraphOp);
 }
 
 void MKLDNNTensorIteratorNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto config = make_plain_config(getCnnLayer());
     supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
 }
 
 
 void MKLDNNTensorIteratorNode::createPrimitive() {
-    auto ti = dynamic_cast<class InferenceEngine::TensorIterator*>(getCnnLayer().get());
-    if (ti == nullptr)
-        IE_THROW() << "Cannot convert to TensorIterator layer.";
-
     const auto &eng = getEngine();
 
-    for (auto map_rule : ti->input_port_map) {
+    for (auto map_rule : inputPortMap) {
         auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
         auto &to_mem = input_mem[map_rule.to];
 
@@ -241,7 +398,7 @@ void MKLDNNTensorIteratorNode::createPrimitive() {
             before_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, true, map_rule, eng));
     }
 
-    for (auto map_rule : ti->output_port_map) {
+    for (auto map_rule : outputPortMap) {
         auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
         auto &from_mem = output_mem[map_rule.to];
 
@@ -251,7 +408,7 @@ void MKLDNNTensorIteratorNode::createPrimitive() {
             after_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, false, map_rule, eng));
     }
 
-    for (auto map_rule : ti->back_edges) {
+    for (auto map_rule : backEdges) {
         auto from_mem = output_mem[map_rule.from];
         auto to_mem = input_mem[map_rule.to];
 
@@ -259,38 +416,29 @@ void MKLDNNTensorIteratorNode::createPrimitive() {
     }
 
     // special purpose ports
-    constexpr auto key_cur_iter_port = "loop_body_current_iteration_idx";
-    constexpr auto key_cond_port = "loop_body_condition_output_idx";
-    constexpr auto key_trip_count_port = "loop_trip_count_idx";
-    constexpr auto key_init_cond_port = "loop_execution_condition_idx";
-
-    auto iter_idx_ports = ti->GetParamAsInts(key_cur_iter_port, {});
-    for (auto idx : iter_idx_ports) {
+    for (auto idx : loopBodyCurrentIterationIdx) {
         auto to_mem = input_mem[idx];
         before_mappers.emplace_back(new IterCountPortHelper(to_mem, eng));
     }
 
-    auto condition_port_idx = ti->GetParamAsInt(key_cond_port, -1);
-    if (condition_port_idx == -1) {
+    if (loopBodyConditionOutputIdx == -1) {
         continue_cond_check.reset(new staticValueCheck(true)); // always true
     } else {
-        auto mem = output_mem[condition_port_idx];
+        auto mem = output_mem[loopBodyConditionOutputIdx];
         continue_cond_check.reset(new asBoolCheck(mem));
     }
 
-    auto trip_count_port_idx = ti->GetParamAsInt(key_trip_count_port, -1);
-    if (trip_count_port_idx == -1) {
+    if (loopTripCountIdx == -1) {
         trip_count_check.reset(new staticValueCheck(n_iter)); // use statically calculated num of iteration
     } else {
-        auto mem = getParentEdgesAtPort(trip_count_port_idx)[0]->getMemoryPtr();
+        auto mem = getParentEdgesAtPort(loopTripCountIdx)[0]->getMemoryPtr();
         trip_count_check.reset(new asIntCheck(mem));
     }
 
-    auto init_cond_port_idx = ti->GetParamAsInt(key_init_cond_port, -1);
-    if (init_cond_port_idx == -1) {
+    if (loopExecutionConditionIdx == -1) {
         initial_cond_check.reset(new staticValueCheck(true));
     } else {
-        auto mem = getParentEdgesAtPort(init_cond_port_idx)[0]->getMemoryPtr();
+        auto mem = getParentEdgesAtPort(loopExecutionConditionIdx)[0]->getMemoryPtr();
         initial_cond_check.reset(new asBoolCheck(mem));
     }
 }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
index a1ba870044d..3f3dd96e6f1 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <mkldnn_node.h>
 #include <mkldnn_graph.h>
 #include <string>
@@ -13,6 +12,19 @@
 
 namespace MKLDNNPlugin {
 
+struct PortMap {
+    // Data map rule
+    int from; /**< Index of external data from ins/outs fields of node */
+    int to;   /**< Index of internal data in iterator body */
+
+    // Iteration rule
+    int axis;      /**< Axis to iterate throught */
+    int stride;    /**< Stride to iterate throught */
+    int start;     /**< Start index of iteration range */
+    int end;       /**< Last index of iteration range  */
+    int part_size; /**< Part size which will be transfered to body subnetwork */
+};
+
 /**
  * Functor interface to perform some action with pointed tensors (captured in constructor)
  * Generally it's read, write or move data from specified tensors.
@@ -45,9 +57,10 @@ protected:
 
 class MKLDNNTensorIteratorNode : public MKLDNNNode {
 public:
-    MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNTensorIteratorNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNTensorIteratorNode() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void initSupportedPrimitiveDescriptors() override;
     void getSupportedDescriptors() override;
     void createPrimitive() override;
@@ -73,6 +86,19 @@ private:
         trip_count_check,      /// < Perform check of trip count value. value >= -1
         initial_cond_check,   /// < Perform check of initial continue condition value. value [0, 1]
         continue_cond_check;  /// < Perform check of continue condition value of body. value [0, 1]
+
+    std::vector<PortMap> inputPortMap;  //!< Input ports map
+    std::vector<PortMap> outputPortMap;  //!< Output ports map
+    std::vector<PortMap> backEdges;  //!< Back edges map
+
+    std::vector<int> loopBodyCurrentIterationIdx;
+    int loopBodyConditionOutputIdx = -1;
+    int loopTripCountIdx = -1;
+    int loopExecutionConditionIdx = -1;
+
+    InferenceEngine::LayerConfig config;
+
+    const std::shared_ptr<ngraph::Node> ngraphOp;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
index e0d1b616e2d..663f3a376f8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
@@ -3,43 +3,87 @@
 //
 
 #include "mkldnn_tile_node.h"
-#include <legacy/ie_layers.h>
 #include <string>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include "common/cpu_memcpy.h"
+#include <ngraph/opsets/opset1.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNTileNode::MKLDNNTileNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNTileNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto tile = std::dynamic_pointer_cast<const ngraph::opset1::Tile>(op);
+        if (!tile) {
+            errorMessage = "Only opset1 Tile operation is supported";
+            return false;
+        }
+        if (tile->get_input_shape(TILE_INPUT).size() != tile->get_input_shape(TILE_REPEATS)[0]) {
+            errorMessage = "Doesn't support inputs with different ranks";
+            return false;
+        }
+        const auto repeatsNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(tile->get_input_node_shared_ptr(TILE_REPEATS));
+        if (repeatsNode == nullptr) {
+            errorMessage = "Only const 'repeats' input is supported";
+            return false;
+        }
+        const auto repeats = repeatsNode->cast_vector<int64_t>();
+        if (std::count_if(repeats.begin(), repeats.end(), [](int64_t x) { return x > 1; }) > 1) {
+            errorMessage = "Doesn't support 'repeats' with more than one specified axis";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNTileNode::MKLDNNTileNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+        errorPrefix = "Tile node with name '" + getName() + "'";
+
+        const auto tile = std::dynamic_pointer_cast<const ngraph::opset1::Tile>(op);
+        const auto repeatsNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(tile->get_input_node_shared_ptr(TILE_REPEATS));
+        const auto repeats = repeatsNode->cast_vector<int64_t>();
+        // At this moment CPU plug-in supports tiling only per single axis
+        // This behavoiur is guaranteed by ConvertTileToSeqTiles
+        for (size_t i = 0; i < repeats.size(); i++) {
+            if (repeats[i] > 1) {
+                axis = i;
+                tiles = repeats[i];
+                break;
+            }
+        }
+        noTiling = axis == -1;
+        if (axis >= static_cast<int>(tile->get_input_shape(TILE_INPUT).size()))
+            IE_THROW() << errorPrefix << " has incorrect tiling axis: " << axis;
+        if (tiles < 1 && !noTiling)
+            IE_THROW() << errorPrefix << " has incorrect 'repeats' value: " << tiles;
+    } else {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNTileNode::getSupportedDescriptors() {
-    auto * tileLayer = dynamic_cast<TileLayer*>(getCnnLayer().get());
-
-    if (tileLayer == nullptr)
-        IE_THROW() << "Cannot convert tile layer.";
-
-    if (getParentEdges().size() != 1)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
+    if (getParentEdges().size() != 2)
+        IE_THROW() << errorPrefix << " has incorrect number of input edges";
     if (!getChildEdges().size())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
-
-    axis = tileLayer->axis;
-    tiles = tileLayer->tiles;
+        IE_THROW() << errorPrefix << " has incorrect number of output edges";
 }
 
 void MKLDNNTileNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(TILE_INPUT);
     if (precision.size() != sizeof(PrecisionTrait<Precision::I32>::value_type) &&
         precision.size() != sizeof(PrecisionTrait<Precision::I16>::value_type) &&
         precision.size() != sizeof(PrecisionTrait<Precision::I8>::value_type)) {
-        IE_THROW() << "Layer Tile has unsupported input precision: " << precision;
+        IE_THROW() << errorPrefix << " has unsupported input precision: " << precision;
     }
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
@@ -48,14 +92,12 @@ void MKLDNNTileNode::initSupportedPrimitiveDescriptors() {
 
     InferenceEngine::LayerConfig config;
     config.dynBatchSupport = true;
-    config.inConfs.resize(1);
+    config.inConfs.resize(2);
     config.outConfs.resize(1);
-    config.inConfs[0].inPlace = -1;
-    config.inConfs[0].constant = false;
-    config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, fmt);
-    config.outConfs[0].inPlace = -1;
-    config.outConfs[0].constant = false;
+    config.inConfs[TILE_INPUT].desc = MKLDNNMemoryDesc(getParentEdgeAt(TILE_INPUT)->getDims(), inputDataType, fmt);
+    config.inConfs[TILE_REPEATS].desc = MKLDNNMemoryDesc(getParentEdgeAt(TILE_REPEATS)->getDims(), memory::data_type::s32, memory::format_tag::x);
     config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), inputDataType, fmt);
+    config.outConfs[0].inPlace = noTiling ? 0 : -1;
     supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, fmt});
 }
 
@@ -63,16 +105,18 @@ void MKLDNNTileNode::createPrimitive() {
     auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
     auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Destination memory didn't allocate.";
+        IE_THROW() << errorPrefix << " can't get destination memory";
     if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Input memory didn't allocate.";
+        IE_THROW() << errorPrefix << " can't get input memory";
     if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
-    if (getParentEdges().size() != 1)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
+        IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
 }
 
 void MKLDNNTileNode::execute(mkldnn::stream strm) {
+    if (noTiling) {
+        return;
+    }
+
     auto& srcMemory = getParentEdgeAt(0)->getMemory();
 
     const uint8_t* src_ptr = reinterpret_cast<const uint8_t*>(srcMemory.GetPtr());
@@ -118,4 +162,5 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) {
 bool MKLDNNTileNode::created() const {
     return getType() == Tile;
 }
+
 REG_MKLDNN_PRIM_FOR(MKLDNNTileNode, Tile);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h
index e3247c2bfbe..b414f9be7ce 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h
@@ -12,7 +12,7 @@ namespace MKLDNNPlugin {
 
 class MKLDNNTileNode : public MKLDNNNode {
 public:
-    MKLDNNTileNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNTileNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNTileNode() override = default;
 
     void getSupportedDescriptors() override;
@@ -21,9 +21,17 @@ public:
     void execute(mkldnn::stream strm) override;
     bool created() const override;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
 private:
-    int axis = 0;
+    static const size_t TILE_INPUT = 0;
+    static const size_t TILE_REPEATS = 1;
+
+    int axis = -1;
     int tiles = 0;
+    bool noTiling = false;
+
+    std::string errorPrefix;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
similarity index 79%
rename from inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp
rename to inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
index a1575166167..f3189ba6332 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
@@ -2,62 +2,77 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "mkldnn_permute_node.h"
-#include <legacy/ie_layers.h>
+#include "mkldnn_transpose_node.h"
+
+#include <algorithm>
 #include <string>
 #include <mkldnn_extension_utils.h>
 #include "ie_parallel.hpp"
 
-#include <algorithm>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
 
-MKLDNNPermuteNode::MKLDNNPermuteNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(layer, eng, cache) {}
+bool MKLDNNTransposeNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        auto transposeOp = ngraph::as_type_ptr<const ngraph::op::v1::Transpose>(op);
+        if (!transposeOp) {
+            errorMessage = "Node is not an instance of the Transpose operation.";
+            return false;
+        }
 
-void MKLDNNPermuteNode::getSupportedDescriptors() {
-    if (getParentEdges().size() != 1)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
-    if (!getChildEdges().size())
-        IE_THROW() << "Incorrect number of output edges for layer " << getName();
+        auto orderOp = ngraph::as_type_ptr<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
+        if (!orderOp) {
+            errorMessage = "Constant expected as the second input.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
 
-    auto& layer = getCnnLayer();
-    if (!layer) {
-        IE_THROW() << "Cannot get CNNLayer.";
+MKLDNNTransposeNode::MKLDNNTransposeNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
     }
 
-    order.clear();
-    std::vector<int> layerOrder = layer->GetParamAsInts("order");
-    for (auto ord : layerOrder)
-        order.push_back(static_cast<size_t>(ord));
+    auto orderOp = ngraph::as_type_ptr<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
+    order = orderOp->cast_vector<size_t>();
 
     if (order.empty()) {
-        size_t rank = getParentEdgeAt(0)->getDims().ndims();
-        for (size_t i = 1; i <= rank; ++i) {
+        size_t rank = op->get_input_shape(0).size();
+        for (size_t i = 1lu; i <= rank; ++i) {
             order.emplace_back(rank - i);
         }
     }
 }
 
-void MKLDNNPermuteNode::initSupportedPrimitiveDescriptors() {
+void MKLDNNTransposeNode::getSupportedDescriptors() {
+}
+
+void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    prec = getCnnLayer()->insData[0].lock()->getPrecision();
+    prec = getOriginalInputPrecisionAtPort(0);
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec);
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec);
+    auto inputOrderDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1));
 
     InferenceEngine::LayerConfig config;
     config.dynBatchSupport = true;
-    config.inConfs.resize(1);
+    config.inConfs.resize(2);
     config.outConfs.resize(1);
     config.inConfs[0].inPlace = -1;
     config.inConfs[0].constant = false;
     config.outConfs[0].inPlace = -1;
     config.outConfs[0].constant = false;
+    config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), inputOrderDataType, memory::format_tag::x);
     if (getParentEdgeAt(0)->getDims().ndims() == 4) {
         config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
         config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw);
@@ -108,7 +123,7 @@ void MKLDNNPermuteNode::initSupportedPrimitiveDescriptors() {
     }
 }
 
-void MKLDNNPermuteNode::createPrimitive() {
+void MKLDNNTransposeNode::createPrimitive() {
     auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
     auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
@@ -133,7 +148,7 @@ void MKLDNNPermuteNode::createPrimitive() {
     permuteKernel = std::unique_ptr<PermuteKernel>(new PermuteKernel(params));
 }
 
-static void permute_to_0231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_0231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
     // Supports only NCHW to NHWC
@@ -167,7 +182,7 @@ static void permute_to_0231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr&
     });
 }
 
-static void permute_to_0213(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_0213(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
     int block_size = 1;
@@ -193,7 +208,7 @@ static void permute_to_0213(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr&
     });
 }
 
-static void permute_to_0312(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_0312(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -211,7 +226,7 @@ static void permute_to_0312(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr&
 }
 
 template <size_t scale_H = 0, size_t scale_W = 0>
-static void permute_to_014253(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_014253(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -247,7 +262,7 @@ static void permute_to_014253(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPt
     }
 }
 
-static void permute_to_3012(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_3012(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -275,7 +290,7 @@ static void permute_to_3012(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr&
     }
 }
 
-static void permute_to_021(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_021(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -299,7 +314,7 @@ static void permute_to_021(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr&
     });
 }
 
-static void permute_to_034152(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_034152(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -335,7 +350,7 @@ static void permute_to_034152(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPt
     }
 }
 
-static void permute_to_0132(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_0132(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
     int src_block_size = 1;
@@ -361,7 +376,7 @@ static void permute_to_0132(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr&
     });
 }
 
-static void permute_to_03142(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_03142(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -393,7 +408,7 @@ static void permute_to_03142(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr
     }
 }
 
-static void permute_to_1203(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_1203(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -410,7 +425,7 @@ static void permute_to_1203(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr&
     });
 }
 
-static void permute_to_02134(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_02134(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -437,7 +452,7 @@ static void permute_to_02134(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr
     });
 }
 
-static void permute_to_02431(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_02431(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -464,7 +479,7 @@ static void permute_to_02431(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr
     });
 }
 
-static void permute_to_04231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_04231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -491,7 +506,7 @@ static void permute_to_04231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr
     });
 }
 
-static void permute_to_102(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_102(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -515,7 +530,7 @@ static void permute_to_102(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr&
     });
 }
 
-static void permute_to_02341(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_02341(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -542,7 +557,7 @@ static void permute_to_02341(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr
     });
 }
 
-static void permute_to_04123(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+static void transpose_to_04123(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetPtr());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetPtr());
 
@@ -569,61 +584,61 @@ static void permute_to_04123(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr
     });
 }
 
-const std::multimap<InferenceEngine::SizeVector, MKLDNNPermuteNode::PermuteImpl> MKLDNNPermuteNode::OptimizedCases = {
-        {{0, 2, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_0231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+const std::multimap<InferenceEngine::SizeVector, MKLDNNTransposeNode::TransposeImpl> MKLDNNTransposeNode::OptimizedCases = {
+        {{0, 2, 3, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_0231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return true;
         })},  // NCHW -> NHWC case
-        {{0, 1, 4, 2, 5, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_014253<2, 2>, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 1, 4, 2, 5, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_014253<2, 2>, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat() && srcMemPtr->GetDims()[2] == 2 && srcMemPtr->GetDims()[3] == 2;
         })},  // Dense upsample convolution case (scale = 2)
-        {{0, 1, 4, 2, 5, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_014253<0, 0>, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 1, 4, 2, 5, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_014253<0, 0>, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},  // Dense upsample convolution case (generic)
-        {{3, 0, 1, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_3012, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{3, 0, 1, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_3012, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat() && MB == srcMemPtr->GetDims()[0];
         })},  // LPR case
-        {{0, 2, 1, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_0213, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 2, 1, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_0213, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},  // shufflenet
-        {{0, 2, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_021, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 2, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_021, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},  // self attention block
-        {{0, 3, 4, 1, 5, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_034152, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 3, 4, 1, 5, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_034152, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},  // learning-to-see-in-the-dark-sony
-        {{0, 1, 3, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_0132, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 1, 3, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_0132, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return true;
         })},
-        {{0, 3, 1, 4, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_03142, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 3, 1, 4, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_03142, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},
-        {{1, 2, 0, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_1203, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{1, 2, 0, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_1203, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat() && MB == srcMemPtr->GetDims()[0];
         })},
-        {{0, 2, 1, 3, 4}, MKLDNNPermuteNode::PermuteImpl(permute_to_02134, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 2, 1, 3, 4}, MKLDNNTransposeNode::TransposeImpl(transpose_to_02134, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},
-        {{0, 2, 4, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_02431, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 2, 4, 3, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_02431, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},
-        {{0, 4, 2, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_04231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 4, 2, 3, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_04231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},
-        {{0, 3, 1, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_0312, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 3, 1, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_0312, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},
-        {{1, 0, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_102, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{1, 0, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_102, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat() && MB == srcMemPtr->GetDims()[0];
         })},
-        {{0, 2, 3, 4, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_02341, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 2, 3, 4, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_02341, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},
-        {{0, 4, 1, 2, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_04123, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
+        {{0, 4, 1, 2, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_04123, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return srcMemPtr->GetDesc().isPlainFormat();
         })},
 };
 
-void MKLDNNPermuteNode::execute(mkldnn::stream strm) {
+void MKLDNNTransposeNode::execute(mkldnn::stream strm) {
     auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
     auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
     int MB = batchToProcess();
@@ -642,7 +657,7 @@ void MKLDNNPermuteNode::execute(mkldnn::stream strm) {
     permuteKernel->execute(srcData, dstData, MB);
 }
 
-bool MKLDNNPermuteNode::created() const {
-    return getType() == Permute;
+bool MKLDNNTransposeNode::created() const {
+    return getType() == Transpose;
 }
-REG_MKLDNN_PRIM_FOR(MKLDNNPermuteNode, Permute);
+REG_MKLDNN_PRIM_FOR(MKLDNNTransposeNode, Transpose);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.h
similarity index 65%
rename from inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
rename to inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.h
index 9c5f00024d1..f1d291bc8a4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.h
@@ -15,11 +15,12 @@
 
 namespace MKLDNNPlugin {
 
-class MKLDNNPermuteNode : public MKLDNNNode {
+class MKLDNNTransposeNode : public MKLDNNNode {
 public:
-    MKLDNNPermuteNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
-    ~MKLDNNPermuteNode() override = default;
+    MKLDNNTransposeNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNTransposeNode() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
     void createPrimitive() override;
@@ -37,16 +38,16 @@ private:
     InferenceEngine::SizeVector order;
     InferenceEngine::Precision prec;
 
-    typedef std::function<void(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr)> permuteImpl;
+    typedef std::function<void(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr)> transposeImpl;
     typedef std::function<bool(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr)> isApplicable;
-    struct PermuteImpl {
-        PermuteImpl(permuteImpl f0, isApplicable f1): execute(std::move(f0)), isValidParams(std::move(f1)) {}
+    struct TransposeImpl {
+        TransposeImpl(transposeImpl f0, isApplicable f1): execute(std::move(f0)), isValidParams(std::move(f1)) {}
 
-        permuteImpl execute;
+        transposeImpl execute;
         isApplicable isValidParams;
     };
 
-    static const std::multimap<InferenceEngine::SizeVector, PermuteImpl> OptimizedCases;
+    static const std::multimap<InferenceEngine::SizeVector, TransposeImpl> OptimizedCases;
     std::unique_ptr<PermuteKernel> permuteKernel;
 };
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp b/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp
index afa8b8f3d4a..b5884a234cb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp
@@ -12,126 +12,111 @@
 #include <utility>
 #include <queue>
 #include "ie_parallel.hpp"
+#include <ngraph_ops/nms_ie_internal.hpp>
+#include "utils/general_utils.h"
+#include <ie_ngraph_utils.hpp>
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using namespace MKLDNNPlugin;
+
 class NonMaxSuppressionImpl: public ExtLayerBase {
 public:
-    explicit NonMaxSuppressionImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            logPrefix = "NMS layer with name '" + layer->name + "' ";
-            if (layer->insData.size() < 2 || layer->insData.size() > 6)
-                IE_THROW() << logPrefix << "has incorrect number of input edges: " << layer->insData.size();
+            const auto nms = std::dynamic_pointer_cast<const ngraph::op::internal::NonMaxSuppressionIEInternal>(op);
+            if (!nms) {
+                errorMessage = "Only internal NonMaxSuppression operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            if (layer->outData.size() < 1 || layer->outData.size() > 3)
-                IE_THROW() << logPrefix << "has incorrect number of output edges: " << layer->outData.size();
-
-            // TODO: remove legacy attribute presentation after migration on opset1
-            if (layer->CheckParamPresence("center_point_box")) {
-                bool center_point_box = layer->GetParamAsBool("center_point_box", false);
-                boxEncodingType = center_point_box ? boxEncoding::CENTER : boxEncoding::CORNER;
-            } else if (layer->CheckParamPresence("box_encoding")) {
-                std::string boxEncAttr = layer->GetParamAsString("box_encoding", "corner");
-                if (boxEncAttr == "corner") {
-                    boxEncodingType = boxEncoding::CORNER;
-                } else if (boxEncAttr == "center") {
-                    boxEncodingType = boxEncoding::CENTER;
-                } else {
-                    IE_THROW() << logPrefix << "has unsupported 'box_encoding' attribute: " << boxEncAttr;
-                }
+    explicit NonMaxSuppressionImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
             }
 
-            sort_result_descending = layer->GetParamAsBool("sort_result_descending", true);
+            errorPrefix = "NMS layer with name '" + op->get_friendly_name() + "' ";
+            const auto nms = std::dynamic_pointer_cast<const ngraph::op::internal::NonMaxSuppressionIEInternal>(op);
+
+            if (nms->get_input_size() < 2 || nms->get_input_size() > 6)
+                IE_THROW() << errorPrefix << "has incorrect number of input edges: " << nms->get_input_size();
+
+            if (nms->get_output_size() < 1 || nms->get_output_size() > 3)
+                IE_THROW() << errorPrefix << "has incorrect number of output edges: " << nms->get_output_size();
+
+            boxEncodingType = nms->m_center_point_box ? boxEncoding::CENTER : boxEncoding::CORNER;
+
+            sort_result_descending = nms->m_sort_result_descending;
 
             const std::vector<Precision> supportedFloatPrecision = {Precision::FP32, Precision::BF16};
             const std::vector<Precision> supportedIntOutputPrecision = {Precision::I32, Precision::I64};
 
-            auto boxesDataPtr = layer->insData[NMS_BOXES].lock();
-            if (boxesDataPtr == nullptr) {
-                IE_THROW() << logPrefix << "has nullable 'boxes' input";
-            }
-            checkPrecision(boxesDataPtr, supportedFloatPrecision, "boxes", inType);
-            const SizeVector &boxes_dims = boxesDataPtr->getTensorDesc().getDims();
+            checkPrecision(op->get_input_element_type(NMS_BOXES), supportedFloatPrecision, "boxes", inType);
+            const SizeVector &boxes_dims = op->get_input_shape(NMS_BOXES);
             num_batches = boxes_dims[0];
             num_boxes = boxes_dims[1];
             if (boxes_dims.size() != 3)
-                IE_THROW() << logPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
+                IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
             if (boxes_dims[2] != 4)
-                IE_THROW() << logPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
+                IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
 
-
-            auto scoresDataPtr = layer->insData[NMS_SCORES].lock();
-            if (scoresDataPtr == nullptr) {
-                IE_THROW() << logPrefix << "has nullable 'scores' input";
-            }
-            checkPrecision(scoresDataPtr, supportedFloatPrecision, "scores", inType);
-            const SizeVector &scores_dims = scoresDataPtr->getTensorDesc().getDims();
+            checkPrecision(op->get_input_element_type(NMS_SCORES), supportedFloatPrecision, "scores", inType);
+            const SizeVector &scores_dims = op->get_input_shape(NMS_SCORES);
             num_classes = scores_dims[1];
             if (scores_dims.size() != 3)
-                IE_THROW() << logPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
+                IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
 
             if (num_batches != scores_dims[0])
-                IE_THROW() << logPrefix << " num_batches is different in 'boxes' and 'scores' inputs";
+                IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs";
             if (num_boxes != scores_dims[2])
-                IE_THROW() << logPrefix << " num_boxes is different in 'boxes' and 'scores' inputs";
+                IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs";
 
             numFiltBox.resize(num_batches);
             for (size_t i = 0; i < numFiltBox.size(); i++)
                 numFiltBox[i].resize(num_classes);
 
-            if (layer->insData.size() > NMS_MAXOUTPUTBOXESPERCLASS) {
-                const std::vector<Precision> supportedPrecision = {Precision::I16, Precision::U8, Precision::I8, Precision::U16, Precision::I32,
-                                                                   Precision::U32, Precision::I64, Precision::U64};
-                check1DInput(layer->insData[NMS_MAXOUTPUTBOXESPERCLASS], supportedPrecision, "max_output_boxes_per_class");
+            const std::vector<Precision> supportedPrecision = {Precision::I16, Precision::U8, Precision::I8, Precision::U16, Precision::I32,
+                                                               Precision::U32, Precision::I64, Precision::U64};
+            check1DInput(op, supportedPrecision, "max_output_boxes_per_class", NMS_MAXOUTPUTBOXESPERCLASS);
+            check1DInput(op, supportedFloatPrecision, "iou_threshold", NMS_IOUTHRESHOLD);
+            check1DInput(op, supportedFloatPrecision, "score_threshold", NMS_SCORETHRESHOLD);
+
+            if (op->get_input_size() > NMS_SOFTNMSSIGMA) {
+                check1DInput(op, supportedFloatPrecision, "soft_nms_sigma", NMS_SOFTNMSSIGMA);
             }
 
-            if (layer->insData.size() > NMS_IOUTHRESHOLD) {
-                check1DInput(layer->insData[NMS_IOUTHRESHOLD], supportedFloatPrecision, "iou_threshold");
-            }
-
-            if (layer->insData.size() > NMS_SCORETHRESHOLD) {
-                check1DInput(layer->insData[NMS_SCORETHRESHOLD], supportedFloatPrecision, "score_threshold");
-            }
-
-            if (layer->insData.size() > NMS_SOFTNMSSIGMA) {
-                check1DInput(layer->insData[NMS_SOFTNMSSIGMA], supportedFloatPrecision, "soft_nms_sigma");
-            }
-
-            checkOutput(layer->outData[NMS_SELECTEDINDICES], supportedIntOutputPrecision, "selected_indices");
-
-            if (layer->outData.size() > NMS_SELECTEDSCORES) {
-                checkOutput(layer->outData[NMS_SELECTEDSCORES], supportedFloatPrecision, "selected_scores");
-            }
-
-            if (layer->outData.size() > NMS_VALIDOUTPUTS) {
-                checkPrecision(layer->outData[NMS_VALIDOUTPUTS], supportedIntOutputPrecision, "valid_outputs", outType);
-                const SizeVector &valid_outputs_dims = layer->outData[NMS_VALIDOUTPUTS]->getTensorDesc().getDims();
-                if (valid_outputs_dims.size() != 1)
-                    IE_THROW() << logPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size();
-                if (valid_outputs_dims[0] != 1)
-                    IE_THROW() << logPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[1];
-            }
+            checkOutput(op, supportedIntOutputPrecision, "selected_indices", NMS_SELECTEDINDICES);
+            checkOutput(op, supportedFloatPrecision, "selected_scores", NMS_SELECTEDSCORES);
+            checkPrecision(op->get_input_element_type(NMS_VALIDOUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType);
+            const SizeVector &valid_outputs_dims = op->get_input_shape(NMS_VALIDOUTPUTS);
+            if (valid_outputs_dims.size() != 1)
+                IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size();
+            if (valid_outputs_dims[0] != 1)
+                IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[1];
 
             LayerConfig config;
-            for (size_t i = 0; i < layer->insData.size(); i++) {
+            for (size_t i = 0; i < op->get_input_size(); i++) {
                 DataConfig inConfig;
 
                 Precision inPrecision = i == NMS_MAXOUTPUTBOXESPERCLASS ? Precision::I32 : Precision::FP32;
-                auto validDataPtr = layer->insData[i].lock();
-                if (validDataPtr == nullptr) {
-                    IE_THROW() << logPrefix << "has nullable " << i << "th input";
-                }
-                const SizeVector& inDims = validDataPtr->getTensorDesc().getDims();
+                const SizeVector& inDims = op->get_input_shape(i);
                 inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
                 config.inConfs.push_back(inConfig);
             }
-            for (size_t i = 0; i < layer->outData.size(); i++) {
+            for (size_t i = 0; i < op->get_output_size(); i++) {
                 DataConfig outConfig;
 
                 Precision outPrecision = i == NMS_SELECTEDSCORES ? Precision::FP32 : Precision::I32;
-                const SizeVector& outDims = layer->outData[i]->getTensorDesc().getDims();
+                const SizeVector& outDims = op->get_output_shape(i);
                 outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
                 config.outConfs.push_back(outConfig);
             }
@@ -439,44 +424,40 @@ private:
     float soft_nms_sigma = 0.0f;
     float scale = 1.f;
 
+    std::string errorPrefix;
+
     std::vector<std::vector<size_t>> numFiltBox;
     const std::string inType = "input", outType = "output";
-    std::string logPrefix;
 
-    void checkPrecision(const DataPtr &dataPtr, const std::vector<Precision> precList, const std::string name, const std::string type) {
-        const TensorDesc &tensorDesc = dataPtr->getTensorDesc();
-        if (std::find(precList.begin(), precList.end(), tensorDesc.getPrecision()) == precList.end())
-            IE_THROW() << logPrefix << " has unsupported '" << name << "' " << type << " precision: " << tensorDesc.getPrecision();
+    void checkPrecision(const ngraph::element::Type &ngPrec, const std::vector<Precision> precList, const std::string name, const std::string type) {
+        const auto prec = details::convertPrecision(ngPrec);
+        if (std::find(precList.begin(), precList.end(), prec) == precList.end())
+            IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
     }
 
-    void check1DInput(const DataWeakPtr &dataPtr, const std::vector<Precision> precList, const std::string name) {
-        auto lockDataPtr = dataPtr.lock();
-        if (lockDataPtr == nullptr) {
-            IE_THROW() << logPrefix << "has nullable '" << name << "' input";
-        }
+    void check1DInput(const std::shared_ptr<ngraph::Node>& op, const std::vector<Precision> precList, const std::string name, const size_t port) {
+        checkPrecision(op->get_input_element_type(port), precList, name, inType);
 
-        checkPrecision(lockDataPtr, precList, name, inType);
-
-        const SizeVector &dims = lockDataPtr->getTensorDesc().getDims();
+        const SizeVector &dims = op->get_input_shape(port);
         if (dims.size() != 0 && dims.size() != 1)
-            IE_THROW() << logPrefix << "has unsupported '" << name << "' input rank: " << dims.size();
+            IE_THROW() << errorPrefix << "has unsupported '" << name << "' input rank: " << dims.size();
         if (dims.size() == 1)
             if (dims[0] != 1)
-                IE_THROW() << logPrefix << "has unsupported '" << name << "' input 1st dimension size: " << dims[0];
+                IE_THROW() << errorPrefix << "has unsupported '" << name << "' input 1st dimension size: " << dims[0];
     }
 
-    void checkOutput(const DataPtr &dataPtr, const std::vector<Precision> precList, const std::string name) {
-        checkPrecision(dataPtr, precList, name, outType);
+    void checkOutput(const std::shared_ptr<ngraph::Node>& op, const std::vector<Precision> precList, const std::string name, const size_t port) {
+        checkPrecision(op->get_output_element_type(port), precList, name, outType);
 
-        const SizeVector &dims = dataPtr->getTensorDesc().getDims();
+        const SizeVector &dims = op->get_output_shape(port);
         if (dims.size() != 2)
-            IE_THROW() << logPrefix << "has unsupported '" << name << "' output rank: " << dims.size();
+            IE_THROW() << errorPrefix << "has unsupported '" << name << "' output rank: " << dims.size();
         if (dims[1] != 3)
-            IE_THROW() << logPrefix << "has unsupported '" << name << "' output 2nd dimension size: " << dims[1];
+            IE_THROW() << errorPrefix << "has unsupported '" << name << "' output 2nd dimension size: " << dims[1];
     }
 };
 
-REG_FACTORY_FOR(NonMaxSuppressionImpl, NonMaxSuppression);
+REG_FACTORY_FOR(NonMaxSuppressionImpl, NonMaxSuppressionIEInternal);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp b/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp
deleted file mode 100644
index 46a6713211f..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-#include "ie_parallel.hpp"
-#include "common/tensor_desc_creator.h"
-#include "common/cpu_memcpy.h"
-#include "utils/bfloat16.hpp"
-#include <mkldnn_selective_build.h>
-
-#include <vector>
-
-using namespace MKLDNNPlugin;
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class OneHotImpl: public ExtLayerBase {
-    typedef PrecisionTrait<Precision::I32>::value_type in_type;
-
-public:
-    explicit OneHotImpl(const CNNLayer* layer) {
-        try {
-            depth     = layer->GetParamAsUInt("depth");
-            axis      = layer->GetParamAsInt("axis", -1);
-            src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
-            dst_dims = layer->outData[0]->getTensorDesc().getDims();
-
-            int output_dims_size = dst_dims.size();
-            if (layer->CheckParamPresence("axis") &&
-                (-1 > axis || axis >= output_dims_size)) {
-                    IE_THROW() << "The value of " << layer->name << " layer axis parameter must be between -1 <= axis < "\
-                                       << output_dims_size << ", but actually it is " << axis;
-            }
-
-            if (!( ((1 + src_dims.size()) == dst_dims.size()) ||
-                   (src_dims.size() == 1 && dst_dims.size() == 1 && dst_dims[0] == depth && src_dims[0] == 1)))
-                IE_THROW() << layer->name << " Incorrect number of input/output dimensions!";
-
-            // check a precision of the input tensor
-            auto input_precision = layer->insData[0].lock()->getTensorDesc().getPrecision();
-            if (input_precision != Precision::I32) {
-                IE_THROW() << layer->name << " Incorrect input precision for the input. Only I32 is supported!";
-            }
-            output_precision = layer->outData[0]->getTensorDesc().getPrecision();
-            if (Precision::BF16 == output_precision) {
-                MKLDNNPlugin::bfloat16_t bf16_on_value  = layer->GetParamAsFloat("on_value", 1.0f);
-                MKLDNNPlugin::bfloat16_t bf16_off_value = layer->GetParamAsFloat("off_value", 0.0f);
-                cpu_memcpy(&on_value, &bf16_on_value, sizeof(MKLDNNPlugin::bfloat16_t));
-                cpu_memcpy(&off_value, &bf16_off_value, sizeof(MKLDNNPlugin::bfloat16_t));
-            } else if (output_precision.is_float()) {
-                float float_on_value  = layer->GetParamAsFloat("on_value", 1.0f);
-                float float_off_value = layer->GetParamAsFloat("off_value", 0.0f);
-                cpu_memcpy(&on_value, &float_on_value, sizeof(float));
-                cpu_memcpy(&off_value, &float_off_value, sizeof(float));
-            } else {
-                on_value = layer->GetParamAsInt("on_value", 1);
-                off_value = layer->GetParamAsInt("off_value", 0);
-            }
-
-            LayerConfig config;
-            DataConfig dataConfig;
-            config.dynBatchSupport = false;
-
-            auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators();
-
-            dataConfig.desc = creators.at(MKLDNNPlugin::TensorDescCreatorTypes::ncsp)->createDesc(input_precision, src_dims);
-            config.inConfs.push_back(dataConfig);
-
-            dataConfig.desc = creators.at(MKLDNNPlugin::TensorDescCreatorTypes::ncsp)->createDesc(output_precision, dst_dims);
-            config.outConfs.push_back(dataConfig);
-
-            confs.push_back(config);
-        } catch (InferenceEngine::Exception& ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        try {
-            std::size_t prefix_size = 1;
-            auto input_dims = inputs.front()->getTensorDesc().getDims();
-
-            std::size_t actual_axis = (axis == -1) ? src_dims.size() : axis;
-            for (size_t i = 0; i < actual_axis; ++i)
-                prefix_size *= input_dims[i];
-
-            std::size_t suffix_size = inputs.front()->size() / prefix_size;
-
-            OneHotContext ctx = {this, inputs[0], outputs[0], prefix_size, suffix_size, false};
-            OV_SWITCH(MKLDNNPlugin, OneHotExecute, ctx, output_precision.size(),
-                      OV_CASE(sizeof(uint32_t), uint32_t),
-                      OV_CASE(sizeof(uint16_t), uint16_t),
-                      OV_CASE(sizeof(uint8_t), uint8_t))
-
-            if (!ctx.executed) {
-                snprintf(resp->msg, sizeof(resp->msg), "Unsupported output data type %s.", output_precision.name());
-                return GENERAL_ERROR;
-            }
-        }
-        catch (const std::exception& excp) {
-            snprintf(resp->msg, sizeof(resp->msg), "%s", excp.what());
-            return GENERAL_ERROR;
-        }
-        catch(...) {
-            return GENERAL_ERROR;
-        }
-        return OK;
-    }
-
-private:
-    template<typename out_type>
-    void one_hot(const Blob::Ptr& input, const Blob::Ptr& output, size_t prefix_size, size_t suffix_size) {
-        const auto *src_data = input->cbuffer().as<const in_type *>();
-        auto *dst_data = output->buffer().as<out_type *>();
-
-        // fill the output with off_value
-        std::size_t dst_size = prefix_size * depth * suffix_size;
-        std::fill(dst_data, dst_data + dst_size, static_cast<out_type>(off_value));
-
-        // set on_value at needed locations
-        auto on_val = static_cast<out_type>(on_value);
-        parallel_for(prefix_size, [&](std::size_t prefix_idx) {
-            const in_type* src_dataPtr = &src_data[prefix_idx * suffix_size];
-            out_type* dst_dataPtr = &dst_data[prefix_idx * depth * suffix_size];
-            for (std::size_t suffix_idx = 0; suffix_idx < suffix_size; ++suffix_idx, ++src_dataPtr, ++dst_dataPtr) {
-                auto v = static_cast<std::size_t>(*src_dataPtr);
-                if (v < depth) {
-                    dst_dataPtr[v * suffix_size] = on_val;
-                }
-            }
-        });
-    }
-
-    struct OneHotContext {
-        OneHotImpl* nodePtr;
-        Blob::Ptr input;
-        Blob::Ptr output;
-        size_t prefix_size;
-        size_t suffix_size;
-        bool executed;
-    };
-
-    template<typename dst_t>
-    struct OneHotExecute {
-        void operator()(OneHotContext & ctx) {
-            ctx.nodePtr->one_hot<dst_t>(ctx.input, ctx.output, ctx.prefix_size, ctx.suffix_size);
-            ctx.executed = true;
-        }
-    };
-
-    uint32_t depth;
-    uint32_t on_value;
-    uint32_t off_value;
-    int32_t axis = -1;
-    SizeVector src_dims;
-    SizeVector dst_dims;
-
-    Precision output_precision;
-};
-
-REG_FACTORY_FOR(OneHotImpl, OneHot);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp b/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp
deleted file mode 100644
index 04d19c21a8a..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class PowerFileImpl: public ExtLayerBase {
-public:
-    explicit PowerFileImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != 1 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
-
-            // TODO: load this from some file or as blob?
-            shift_.push_back(1);
-            shift_.push_back(0);
-            shift_.push_back(0);
-            shift_.push_back(0);
-            shift_.push_back(1);
-            shift_.push_back(0);
-
-            addConfig(layer, {DataConfigurator(ConfLayout::PLN, Precision::FP32)}, {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        if (inputs.size() != 1 || outputs.empty()) {
-            if (resp) {
-                std::string errorMsg = "Incorrect number of input or output edges!";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return GENERAL_ERROR;
-        }
-        float* src_data = inputs[0]->buffer();
-        float* dst_data = outputs[0]->buffer();
-
-        for (size_t i = 0; i < inputs[0]->size(); i++) {
-            size_t shift_idx = i % shift_.size();
-            dst_data[i] = src_data[i] + shift_[shift_idx];
-        }
-        return OK;
-    }
-
-private:
-    std::vector<int> shift_;
-};
-
-REG_FACTORY_FOR(PowerFileImpl, PowerFile);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp b/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp
deleted file mode 100644
index f98cd07660f..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <vector>
-#include <string>
-#include <cmath>
-#include <limits>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class PriorBoxImpl: public ExtLayerBase {
-    static inline float clip_great(float x, float threshold) {
-        return x < threshold ? x : threshold;
-    }
-
-    static inline float clip_less(float x, float threshold) {
-        return x > threshold ? x : threshold;
-    }
-
-public:
-    explicit PriorBoxImpl(const CNNLayer *layer) {
-        try {
-            if (layer->insData.size() != 2 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
-
-            if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4 ||
-                    layer->insData[1].lock()->getTensorDesc().getDims().size() != 4)
-                IE_THROW() << "PriorBox supports only 4D blobs!";
-
-            _offset = layer->GetParamAsFloat("offset");
-            _step = layer->GetParamAsFloat("step", 0);
-            _min_sizes = layer->GetParamAsFloats("min_size", {});
-            _max_sizes = layer->GetParamAsFloats("max_size", {});
-            _flip = layer->GetParamAsBool("flip", false);
-            _clip = layer->GetParamAsBool("clip", false);
-            _scale_all_sizes = layer->GetParamAsBool("scale_all_sizes", true);
-
-           _fixed_sizes = layer->GetParamAsFloats("fixed_size", {});
-           _fixed_ratios = layer->GetParamAsFloats("fixed_ratio", {});
-           _densitys = layer->GetParamAsFloats("density", {});
-
-            bool exist;
-
-            _aspect_ratios.push_back(1.0f);
-
-            const std::vector<float> aspect_ratios = layer->GetParamAsFloats("aspect_ratio", {});
-
-            for (float aspect_ratio : aspect_ratios) {
-                exist = false;
-
-                if (std::fabs(aspect_ratio) < std::numeric_limits<float>::epsilon()) {
-                    IE_THROW() << "aspect_ratio param can't be equal to zero";
-                }
-
-                for (float _aspect_ratio : _aspect_ratios) {
-                    if (fabs(aspect_ratio - _aspect_ratio) < 1e-6) {
-                        exist = true;
-                        break;
-                    }
-                }
-
-                if (exist) {
-                    continue;
-                }
-
-                _aspect_ratios.push_back(aspect_ratio);
-
-                if (_flip) {
-                    _aspect_ratios.push_back(1.0f / aspect_ratio);
-                }
-            }
-
-            if (_scale_all_sizes) {
-                _num_priors = static_cast<int>(_aspect_ratios.size() * _min_sizes.size());
-            } else {
-                _num_priors = static_cast<int>(_aspect_ratios.size() + _min_sizes.size() - 1);
-            }
-
-            if (_fixed_sizes.size() > 0) {
-                _num_priors = static_cast<int>(_aspect_ratios.size() * _fixed_sizes.size());
-            }
-
-            if (_densitys.size() > 0) {
-                for (size_t i = 0; i < _densitys.size(); ++i) {
-                    if (_fixed_ratios.size() > 0) {
-                       _num_priors += (_fixed_ratios.size()) * (static_cast<size_t>(pow(_densitys[i], 2)) - 1);
-                    } else {
-                        _num_priors += (_aspect_ratios.size()) * (static_cast<size_t>(pow(_densitys[i], 2)) - 1);
-                    }
-                }
-            }
-
-            for (auto it = _max_sizes.begin(); it != _max_sizes.end(); it++) {
-                _num_priors += 1;
-            }
-
-            const std::vector<float> variance = layer->GetParamAsFloats("variance", {});
-
-            if (variance.size() == 1 || variance.size() == 4) {
-                for (float i : variance) {
-                    if (i < 0) {
-                        IE_THROW() << "Variance must be > 0.";
-                    }
-
-                    _variance.push_back(i);
-                }
-            } else if (variance.empty()) {
-                _variance.push_back(0.1f);
-            } else {
-                IE_THROW() << "Wrong number of variance values. Not less than 1 and more than 4 variance values.";
-            }
-
-            addConfig(layer, {{ConfLayout::ANY, true}, {ConfLayout::ANY, true}}, {{ConfLayout::PLN, true, -1, Precision::FP32}});
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode init(LayerConfig& config, ResponseDesc *resp) noexcept override {
-        return OK;
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        if (inputs.size() != 2 || outputs.empty()) {
-            if (resp) {
-                std::string errorMsg = "Incorrect number of input or output edges!";
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return GENERAL_ERROR;
-        }
-        auto& dataMemPtr = inputs[0];
-        auto& imageMemPtr = inputs[1];
-        auto& dstMemPtr = outputs[0];
-        SizeVector _data_dims = dataMemPtr->getTensorDesc().getDims();
-        SizeVector _image_dims = imageMemPtr->getTensorDesc().getDims();
-        const int W = _data_dims[3];
-        const int H = _data_dims[2];
-        const int IW = _image_dims[3];
-        const int IH = _image_dims[2];
-
-        const int OH = dstMemPtr->getTensorDesc().getDims()[2];
-        const int OW = (dstMemPtr->getTensorDesc().getDims().size() == 3) ? 1 : dstMemPtr->getTensorDesc().getDims()[3];
-
-        float step_x = 0.0f;
-        float step_y = 0.0f;
-
-        if (_step == 0) {
-            step_x = static_cast<float>(IW) / W;
-            step_y = static_cast<float>(IH) / H;
-        } else {
-            step_x = _step;
-            step_y = _step;
-        }
-
-        float IWI = 1.0f / static_cast<float>(IW);
-        float IHI = 1.0f / static_cast<float>(IH);
-
-        float* dst_data = dstMemPtr->buffer();
-
-        int idx = 0;
-        float center_x = 0.0f;
-        float center_y = 0.0f;
-
-        float box_width;
-        float box_height;
-
-        for (int h = 0; h < H; ++h) {
-            for (int w = 0; w < W; ++w) {
-                if (_step == 0) {
-                    center_x = (w + 0.5f) * step_x;
-                    center_y = (h + 0.5f) * step_y;
-                } else {
-                    center_x = (_offset + w) * _step;
-                    center_y = (_offset + h) * _step;
-                }
-
-                for (size_t s = 0; s < _fixed_sizes.size(); ++s) {
-                    size_t fixed_size_ = static_cast<size_t>(_fixed_sizes[s]);
-                    box_width = box_height = fixed_size_ * 0.5f;
-
-                    if (_fixed_ratios.size() > 0) {
-                        for (float ar : _fixed_ratios) {
-                            size_t density_ = static_cast<size_t>(_densitys[s]);
-                            int shift = static_cast<int>(_fixed_sizes[s] / density_);
-                            ar = sqrt(ar);
-                            float box_width_ratio = _fixed_sizes[s] * 0.5f * ar;
-                            float box_height_ratio = _fixed_sizes[s] * 0.5f / ar;
-                            for (size_t r = 0; r < density_; ++r) {
-                                for (size_t c = 0; c < density_; ++c) {
-                                    float center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift;
-                                    float center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift;
-
-                                    // xmin
-                                    dst_data[idx++] = clip_less((center_x_temp - box_width_ratio) * IWI, 0);
-                                    // ymin
-                                    dst_data[idx++] = clip_less((center_y_temp - box_height_ratio) * IHI, 0);
-                                    // xmax
-                                    dst_data[idx++] = clip_great((center_x_temp + box_width_ratio) * IWI, 1);
-                                    // ymax
-                                    dst_data[idx++] = clip_great((center_y_temp + box_height_ratio) * IHI, 1);
-                                }
-                            }
-                        }
-                    } else {
-                        if (_densitys.size() > 0) {
-                            int density_ = static_cast<int>(_densitys[s]);
-                            int shift = static_cast<int>(_fixed_sizes[s] / density_);
-                            for (int r = 0; r < density_; ++r) {
-                                for (int c = 0; c < density_; ++c) {
-                                    float center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift;
-                                    float center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift;
-
-                                    // xmin
-                                    dst_data[idx++] = clip_less((center_x_temp - box_width) * IWI, 0);
-                                    // ymin
-                                    dst_data[idx++] = clip_less((center_y_temp - box_height) * IHI, 0);
-                                    // xmax
-                                    dst_data[idx++] = clip_great((center_x_temp + box_width) * IWI, 1);
-                                    // ymax
-                                    dst_data[idx++] = clip_great((center_y_temp + box_height) * IHI, 1);
-                                }
-                            }
-                        }
-                        //  Rest of priors
-                        for (float ar : _aspect_ratios) {
-                            if (fabs(ar - 1.) < 1e-6) {
-                                continue;
-                            }
-
-                            int density_ = static_cast<int>(_densitys[s]);
-                            int shift = static_cast<int>(_fixed_sizes[s] / density_);
-                            ar = sqrt(ar);
-                            float box_width_ratio = _fixed_sizes[s] * 0.5f * ar;
-                            float box_height_ratio = _fixed_sizes[s] * 0.5f / ar;
-                            for (int r = 0; r < density_; ++r) {
-                                for (int c = 0; c < density_; ++c) {
-                                    float center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift;
-                                    float center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift;
-                                    // xmin
-                                    dst_data[idx++] = clip_less((center_x_temp - box_width_ratio) * IWI, 0);
-                                    // ymin
-                                    dst_data[idx++] = clip_less((center_y_temp - box_height_ratio) * IHI, 0);
-                                    // xmax
-                                    dst_data[idx++] = clip_great((center_x_temp + box_width_ratio) * IWI, 1);
-                                    // ymax
-                                    dst_data[idx++] = clip_great((center_y_temp + box_height_ratio) * IHI, 1);
-                                }
-                            }
-                        }
-                    }
-                }
-
-                for (size_t msIdx = 0; msIdx < _min_sizes.size(); msIdx++) {
-                    box_width = _min_sizes[msIdx] * 0.5f;
-                    box_height = _min_sizes[msIdx] * 0.5f;
-
-                    dst_data[idx++] = (center_x - box_width) * IWI;
-                    dst_data[idx++] = (center_y - box_height) * IHI;
-                    dst_data[idx++] = (center_x + box_width) * IWI;
-                    dst_data[idx++] = (center_y + box_height) * IHI;
-
-                    if (_max_sizes.size() > msIdx) {
-                        box_width = box_height = sqrt(_min_sizes[msIdx] * _max_sizes[msIdx]) * 0.5f;
-
-                        dst_data[idx++] = (center_x - box_width) * IWI;
-                        dst_data[idx++] = (center_y - box_height) * IHI;
-                        dst_data[idx++] = (center_x + box_width) * IWI;
-                        dst_data[idx++] = (center_y + box_height) * IHI;
-                    }
-
-                    if (_scale_all_sizes || (!_scale_all_sizes && (msIdx == _min_sizes.size() - 1))) {
-                        size_t sIdx = _scale_all_sizes ? msIdx : 0;
-                        for (float ar : _aspect_ratios) {
-                            if (fabs(ar - 1.0f) < 1e-6) {
-                                continue;
-                            }
-
-                            ar = sqrt(ar);
-                            box_width = _min_sizes[sIdx] * 0.5f * ar;
-                            box_height = _min_sizes[sIdx] * 0.5f / ar;
-
-                            dst_data[idx++] = (center_x - box_width) * IWI;
-                            dst_data[idx++] = (center_y - box_height) * IHI;
-                            dst_data[idx++] = (center_x + box_width) * IWI;
-                            dst_data[idx++] = (center_y + box_height) * IHI;
-                        }
-                    }
-                }
-            }
-        }
-
-        if (_clip) {
-            parallel_for((H * W * _num_priors * 4), [&](size_t i) {
-                dst_data[i] = (std::min)((std::max)(dst_data[i], 0.0f), 1.0f);
-            });
-        }
-
-        size_t channel_size = OH * OW;
-        dst_data += channel_size;
-        if (_variance.size() == 1) {
-            parallel_for(channel_size, [&](size_t i) {
-                dst_data[i] = _variance[0];
-            });
-        } else {
-            parallel_for((H * W * _num_priors), [&](size_t i) {
-                for (size_t j = 0; j < 4; ++j) {
-                    dst_data[i * 4 + j] = _variance[j];
-                }
-            });
-        }
-        return OK;
-    }
-
-private:
-    float _offset = 0;
-    float _step = 0;
-    std::vector<float> _min_sizes;
-    std::vector<float> _max_sizes;
-    bool _flip = false;
-    bool _clip = false;
-    bool _scale_all_sizes = true;
-
-    std::vector<float> _fixed_sizes;
-    std::vector<float> _fixed_ratios;
-    std::vector<float> _densitys;
-
-    std::vector<float> _aspect_ratios;
-    std::vector<float> _variance;
-
-    int _num_priors = 0;
-};
-
-REG_FACTORY_FOR(PriorBoxImpl, PriorBox);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp b/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp
deleted file mode 100644
index 0d38b4faebe..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-#include <algorithm>
-#include <vector>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class PriorBoxClusteredImpl: public ExtLayerBase {
-public:
-    explicit PriorBoxClusteredImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != 2 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
-
-            if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4 ||
-                    layer->insData[1].lock()->getTensorDesc().getDims().size() != 4)
-                IE_THROW() << "PriorBoxClustered supports only 4D blobs!";
-
-            widths_ = layer->GetParamAsFloats("width", {});
-            heights_ = layer->GetParamAsFloats("height", {});
-            clip_ = layer->GetParamAsInt("clip");
-            variance_ = layer->GetParamAsFloats("variance", {});
-            img_h_ = layer->GetParamAsInt("img_h", 0);
-            img_w_ = layer->GetParamAsInt("img_w", 0);
-            step_ = layer->GetParamAsFloat("step", 0);
-            step_h_ = layer->GetParamAsFloat("step_h", 0);
-            step_w_ = layer->GetParamAsFloat("step_w", 0);
-            offset_ = layer->GetParamAsFloat("offset");
-
-            addConfig(layer, {{ConfLayout::PLN, true}, {ConfLayout::PLN, true}}, {{ConfLayout::PLN, true, -1, Precision::FP32}});
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode init(LayerConfig& config, ResponseDesc *resp) noexcept override {
-        return OK;
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        int num_priors_ = widths_.size();
-
-        if (variance_.empty())
-            variance_.push_back(0.1f);
-
-        // Execute
-        const int layer_width = inputs[0]->getTensorDesc().getDims()[3];
-        const int layer_height = inputs[0]->getTensorDesc().getDims()[2];
-
-        int img_width = img_w_ == 0 ? inputs[1]->getTensorDesc().getDims()[3] : img_w_;
-        int img_height = img_h_ == 0 ? inputs[1]->getTensorDesc().getDims()[2] : img_h_;
-
-        float step_w = step_w_ == 0 ? step_ : step_w_;
-        float step_h = step_h_ == 0 ? step_ : step_h_;
-        if (step_w == 0 && step_h == 0) {
-            step_w = static_cast<float>(img_width) / layer_width;
-            step_h = static_cast<float>(img_height) / layer_height;
-        }
-
-        auto *top_data_0 = outputs[0]->buffer().as<float *>();
-        float *top_data_1 = top_data_0 + outputs[0]->getTensorDesc().getDims()[2];
-        int var_size = variance_.size();
-
-        for (int h = 0; h < layer_height; ++h) {
-            for (int w = 0; w < layer_width; ++w) {
-                float center_x = (w + offset_) * step_w;
-                float center_y = (h + offset_) * step_h;
-
-                for (int s = 0; s < num_priors_; ++s) {
-                    float box_width = widths_[s];
-                    float box_height = heights_[s];
-
-                    float xmin = (center_x - box_width / 2.0f) / img_width;
-                    float ymin = (center_y - box_height / 2.0f) / img_height;
-                    float xmax = (center_x + box_width / 2.0f) / img_width;
-                    float ymax = (center_y + box_height / 2.0f) / img_height;
-
-                    if (clip_) {
-                        xmin = (std::min)((std::max)(xmin, 0.0f), 1.0f);
-                        ymin = (std::min)((std::max)(ymin, 0.0f), 1.0f);
-                        xmax = (std::min)((std::max)(xmax, 0.0f), 1.0f);
-                        ymax = (std::min)((std::max)(ymax, 0.0f), 1.0f);
-                    }
-
-                    top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 0] = xmin;
-                    top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 1] = ymin;
-                    top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 2] = xmax;
-                    top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 3] = ymax;
-
-                    for (int j = 0; j < var_size; j++)
-                        top_data_1[h * layer_width * num_priors_ * var_size + w * num_priors_ * var_size +
-                                   s * var_size +
-                                   j] = variance_[j];
-                }
-            }
-        }
-        return OK;
-    }
-
-private:
-    std::vector<float> widths_;
-    std::vector<float> heights_;
-    std::vector<float> variance_;
-    int clip_;
-    int img_h_;
-    int img_w_;
-    float step_;
-    float step_h_;
-    float step_w_;
-    float offset_;
-};
-
-REG_FACTORY_FOR(PriorBoxClusteredImpl, PriorBoxClustered);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp
index e49e4b572dd..662c24086ee 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp
@@ -6,6 +6,9 @@
 #include <algorithm>
 #include <cassert>
 #include <vector>
+#include <ngraph/opsets/opset6.hpp>
+
+using MKLDNNPlugin::TensorDescCreatorTypes;
 
 namespace InferenceEngine {
 namespace Extensions {
@@ -26,27 +29,49 @@ private:
     // Outputs:
     //      priors_grid, shape [m, 4]
 
-public:
-    explicit ExperimentalDetectronPriorGridGeneratorImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() > 3 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
+            const auto priorGridGen = std::dynamic_pointer_cast<const ngraph::opset6::ExperimentalDetectronPriorGridGenerator>(op);
+            if (!priorGridGen) {
+                errorMessage = "Only opset6 ExperimentalDetectronPriorGridGenerator operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            if (layer->insData[INPUT_PRIORS].lock()->getTensorDesc().getDims().size() != 2 ||
-                    (layer->insData.size() > INPUT_FEATUREMAP &&
-                     layer->insData[INPUT_FEATUREMAP].lock()->getTensorDesc().getDims().size() != 4) ||
-                    (layer->insData.size() > INPUT_IMAGE &&
-                     layer->insData[INPUT_IMAGE].lock()->getTensorDesc().getDims().size() != 4))
-                IE_THROW() << "Unsupported shape of input blobs!";
+    std::string errorPrefix;
 
-            grid_w_ = layer->GetParamAsInt("w", 0);
-            grid_h_ = layer->GetParamAsInt("h", 0);
-            stride_h_ = layer->GetParamAsFloat("stride_y", 0);
-            stride_w_ = layer->GetParamAsFloat("stride_x", 0);
+public:
+    explicit ExperimentalDetectronPriorGridGeneratorImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
 
-            addConfig(layer,
-                      {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::ANY), DataConfigurator(ConfLayout::ANY)},
-                      {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
+            errorPrefix = "ExperimentalDetectronPriorGridGenerator layer with name '" + op->get_friendly_name() + "'";
+            const auto priorGridGen = std::dynamic_pointer_cast<const ngraph::opset6::ExperimentalDetectronPriorGridGenerator>(op);
+            if (op->get_input_size() != 3 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+            if (op->get_input_shape(INPUT_PRIORS).size() != 2 ||
+                op->get_input_shape(INPUT_FEATUREMAP).size() != 4 ||
+                    op->get_input_shape(INPUT_IMAGE).size() != 4)
+                IE_THROW() << errorPrefix << " has unsupported input shape";
+
+            const auto &attr = priorGridGen->get_attrs();
+            grid_w_ = attr.w;
+            grid_h_ = attr.h;
+            stride_h_ = attr.stride_y;
+            stride_w_ = attr.stride_x;
+
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                           {TensorDescCreatorTypes::ncsp, Precision::FP32},
+                           {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
         }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp b/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp
index 037587228c8..7dd69d7a007 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp
@@ -4,15 +4,20 @@
 
 #include "base.hpp"
 
-#include "proposal_imp.hpp"
 #include <string>
 #include <cmath>
 #include <vector>
 
+#include "common/tensor_desc_creator.h"
+#include "proposal_imp.hpp"
+#include <ngraph/op/proposal.hpp>
+
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 static
 std::vector<float> generate_anchors(proposal_conf &conf) {
     auto base_size = conf.base_size_;
@@ -75,32 +80,52 @@ std::vector<float> generate_anchors(proposal_conf &conf) {
 
 class ProposalImpl : public ExtLayerBase {
 public:
-    explicit ProposalImpl(const CNNLayer *layer) {
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() != 3 || (layer->outData.size() != 1 && layer->outData.size() != 2))
-                IE_THROW() << "Incorrect number of input/output edges!";
+            auto proposal0Op = ngraph::as_type_ptr<const ngraph::op::v0::Proposal>(op);
+            auto proposal4Op = ngraph::as_type_ptr<const ngraph::op::v4::Proposal>(op);
+            if (!proposal0Op && !proposal4Op) {
+                errorMessage = "Node is not an instance of the Proposal from the operations set v0 or v4.";
+                return false;
+            }
+            auto proposalOp = std::dynamic_pointer_cast<const ngraph::op::v0::Proposal>(op);
+            // [NM] TODO: Enable after fix Issue: 53750
+            // if (proposalOp->get_attrs().framework != "tensorflow" && !proposalOp->get_attrs().framework.empty()) {
+            //     errorMessage = "Unsupported framework attribute: " + proposalOp->get_attrs().framework;
+            //     return false;
+            // }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4)
-                IE_THROW() << "Proposal supports only 4D blobs!";
+    explicit ProposalImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
 
-            conf.feat_stride_ = static_cast<size_t>(layer->GetParamAsInt("feat_stride"));
-            conf.base_size_ = static_cast<size_t>(layer->GetParamAsInt("base_size"));
-            conf.min_size_ = static_cast<size_t>(layer->GetParamAsInt("min_size"));
-            conf.pre_nms_topn_ = layer->GetParamAsInt("pre_nms_topn");
-            conf.post_nms_topn_ = layer->GetParamAsInt("post_nms_topn");
-            conf.nms_thresh_ = layer->GetParamAsFloat("nms_thresh");
-            conf.box_coordinate_scale_ = layer->GetParamAsFloat("box_coordinate_scale", 1.0);
-            conf.box_size_scale_ = layer->GetParamAsFloat("box_size_scale", 1.0);
-            conf.scales = layer->GetParamAsFloats("scale", {});
-            conf.ratios = layer->GetParamAsFloats("ratio", {});
-            conf.normalize_ = layer->GetParamAsBool("normalize", false);
-            conf.clip_before_nms = layer->GetParamAsBool("clip_before_nms", true);
-            conf.clip_after_nms = layer->GetParamAsBool("clip_after_nms", false);
+            auto proposalOp = std::dynamic_pointer_cast<const ngraph::op::v0::Proposal>(op);
+            auto proposalAttrs = proposalOp->get_attrs();
 
+            conf.feat_stride_ = proposalAttrs.feat_stride;
+            conf.base_size_ = proposalAttrs.base_size;
+            conf.min_size_ = proposalAttrs.min_size;
+            conf.pre_nms_topn_ = proposalAttrs.pre_nms_topn;
+            conf.post_nms_topn_ = proposalAttrs.post_nms_topn;
+            conf.nms_thresh_ = proposalAttrs.nms_thresh;
+            conf.box_coordinate_scale_ = proposalAttrs.box_coordinate_scale;
+            conf.box_size_scale_ = proposalAttrs.box_size_scale;
+            conf.scales = proposalAttrs.scale;
+            conf.ratios = proposalAttrs.ratio;
+            conf.normalize_ = proposalAttrs.normalize;
+            conf.clip_before_nms = proposalAttrs.clip_before_nms;
+            conf.clip_after_nms = proposalAttrs.clip_after_nms;
             conf.anchors_shape_0 = conf.ratios.size() * conf.scales.size();
 
-            std::string framework_ = layer->GetParamAsString("framework", "");
-            if (framework_ == "tensorflow") {
+            if (proposalAttrs.framework == "tensorflow") {
                 conf.coordinates_offset = 0.0f;
                 conf.initial_clip = true;
                 conf.shift_anchors = true;
@@ -117,66 +142,60 @@ public:
             anchors = generate_anchors(conf);
             roi_indices.resize(conf.post_nms_topn_);
 
-            store_prob = layer->outData.size() == 2;
+            store_prob = op->get_output_size() == 2;
             if (store_prob) {
-                addConfig(layer, {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                                 DataConfigurator(ConfLayout::PLN, Precision::FP32)},
-                                 {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32)});
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::FP32}});
             } else {
-                addConfig(layer, {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                                 DataConfigurator(ConfLayout::PLN, Precision::FP32)}, {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
             }
         } catch (const InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
+            throw;
         }
     }
 
     StatusCode execute(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs,
                        ResponseDesc *resp) noexcept override {
         try {
-            if (inputs.size() != 3 || outputs.empty()) {
-                IE_THROW() << "Incorrect number of input or output edges!";
-            }
-
-            // Prepare memory
-            const float *p_bottom_item = inputs[0]->buffer();
-            const float *p_d_anchor_item = inputs[1]->buffer();
-            const float *p_img_info_cpu = inputs[2]->buffer();
-            float *p_roi_item = outputs[0]->buffer();
-            float *p_prob_item = nullptr;
+            const float* probabilitiesData = inputs[PROBABILITIES_IN_IDX]->cbuffer().as<const float*>() +
+                inputs[PROBABILITIES_IN_IDX]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            const float* anchorsData = inputs[ANCHORS_IN_IDX]->cbuffer().as<const float*>() +
+                inputs[ANCHORS_IN_IDX]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            const float* imgInfoData = inputs[IMG_INFO_IN_IDX]->cbuffer().as<const float*>() +
+                inputs[IMG_INFO_IN_IDX]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            float* outRoiData = outputs[ROI_OUT_IDX]->buffer().as<float*>() +
+                outputs[ROI_OUT_IDX]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            float* outProbData = nullptr;
             if (store_prob)
-                p_prob_item = outputs[1]->buffer();
-
-            auto dims0 = inputs[0]->getTensorDesc().getDims();
-            auto img_info_dims = inputs[2]->getTensorDesc().getDims();
-            if (img_info_dims.size() != 2)
-                IE_THROW() << "Size of im_info tensor for Proposal is incorrect! Size of im_info must be 2. "
-                                   << "Now im_info size is " << img_info_dims.size() << ".";
-
-            if (img_info_dims[1] != 3 && img_info_dims[1] != 4)
-                IE_THROW() << "Shape of im_info tensor for Proposal is incorrect! "
-                                   << "Shape of im_info must be of  [1, 3] or [1, 4]! "
-                                   << "Now shape of im_info is" << img_info_dims[0] << ", " << img_info_dims[1] << "].";
-
-            size_t img_info_size = img_info_dims[1];
+                outProbData = outputs[PROBABILITIES_OUT_IDX]->buffer().as<float*>() +
+                    outputs[PROBABILITIES_OUT_IDX]->getTensorDesc().getBlockingDesc().getOffsetPadding();
 
+            auto inProbDims = inputs[0]->getTensorDesc().getDims();
+            const size_t imgInfoSize = inputs[2]->getTensorDesc().getDims()[0];
 
             // input image height & width
-            const float img_H = p_img_info_cpu[0];
-            const float img_W = p_img_info_cpu[1];
-            if (!std::isnormal(img_H) || !std::isnormal(img_W) || (img_H < 0.f) || (img_W < 0.f)) {
+            const float imgHeight = imgInfoData[0];
+            const float imgWidth = imgInfoData[1];
+            if (!std::isnormal(imgHeight) || !std::isnormal(imgWidth) || (imgHeight < 0.f) || (imgWidth < 0.f)) {
                 IE_THROW() << "Proposal operation image info input must have positive image height and width.";
             }
 
             // scale factor for height & width
-            const float scale_H = p_img_info_cpu[2];
-            const float scale_W = img_info_size == 4 ? p_img_info_cpu[3] : scale_H;
-            if (!std::isfinite(scale_H) || !std::isfinite(scale_W) || (scale_H < 0.f) || (scale_W < 0.f)) {
+            const float scaleHeight = imgInfoData[2];
+            const float scaleWidth = imgInfoSize == 4 ? imgInfoData[3] : scaleHeight;
+            if (!std::isfinite(scaleHeight) || !std::isfinite(scaleWidth) || (scaleHeight < 0.f) || (scaleWidth < 0.f)) {
                 IE_THROW() << "Proposal operation image info input must have non negative scales.";
             }
 
-            XARCH::proposal_exec(p_bottom_item, p_d_anchor_item, dims0,
-                    {img_H, img_W, scale_H, scale_W}, anchors.data(), roi_indices.data(), p_roi_item, p_prob_item, conf);
+            XARCH::proposal_exec(probabilitiesData, anchorsData, inProbDims,
+                    {imgHeight, imgWidth, scaleHeight, scaleWidth}, anchors.data(), roi_indices.data(), outRoiData, outProbData, conf);
 
             return OK;
         } catch (const InferenceEngine::Exception& e) {
@@ -189,6 +208,12 @@ public:
     }
 
 private:
+    const size_t PROBABILITIES_IN_IDX = 0lu;
+    const size_t ANCHORS_IN_IDX = 1lu;
+    const size_t IMG_INFO_IN_IDX = 2lu;
+    const size_t ROI_OUT_IDX = 0lu;
+    const size_t PROBABILITIES_OUT_IDX = 1lu;
+
     proposal_conf conf;
     std::vector<float> anchors;
     std::vector<int> roi_indices;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
index 878797e6d17..c36f47f68c6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp
@@ -15,6 +15,8 @@
 #include <immintrin.h>
 #endif
 #include "ie_parallel.hpp"
+#include <ngraph/op/experimental_detectron_generate_proposals.hpp>
+#include "common/tensor_desc_creator.h"
 
 
 namespace {
@@ -39,6 +41,8 @@ namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 static
 void refine_anchors(const float* deltas, const float* scores, const float* anchors,
                     float* proposals, const int anchors_num, const int bottom_H,
@@ -272,7 +276,7 @@ void fill_output_blobs(const float* proposals, const int* roi_indices,
 }
 
 
-class ONNXCustomProposalImpl : public ExtLayerBase {
+class ExperimentalDetectronGenerateProposalsSingleImageImpl : public ExtLayerBase {
 private:
     const int INPUT_IM_INFO {0};
     const int INPUT_ANCHORS {1};
@@ -282,25 +286,44 @@ private:
     const int OUTPUT_SCORES {1};
 
 public:
-    explicit ONNXCustomProposalImpl(const CNNLayer *layer) {
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() != 4 || layer->outData.size() != 2)
-                IE_THROW() << "Incorrect number of input/output edges!";
+            auto proposalOp = ngraph::as_type_ptr<const ngraph::op::v6::ExperimentalDetectronGenerateProposalsSingleImage>(op);
+            if (!proposalOp) {
+                errorMessage = "Node is not an instance of the Proposal from the operations set v0.";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            min_size_ = layer->GetParamAsFloat("min_size");
-            nms_thresh_ = layer->GetParamAsFloat("nms_threshold");
-            pre_nms_topn_ = layer->GetParamAsInt("pre_nms_count");
-            post_nms_topn_ = layer->GetParamAsInt("post_nms_count");
+    explicit ExperimentalDetectronGenerateProposalsSingleImageImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            auto proposalOp = ngraph::as_type_ptr<const ngraph::op::v6::ExperimentalDetectronGenerateProposalsSingleImage>(op);
+            auto proposalAttrs = proposalOp->get_attrs();
+
+            min_size_ = proposalAttrs.min_size;
+            nms_thresh_ = proposalAttrs.nms_threshold;
+            pre_nms_topn_ = proposalAttrs.pre_nms_count;
+            post_nms_topn_ = proposalAttrs.post_nms_count;
 
             coordinates_offset = 0.0f;
 
             roi_indices_.resize(post_nms_topn_);
-            addConfig(layer,
-                      {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                       DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32)},
-                      {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32)});
+            addConfig(op,
+                      {{TensorDescCreatorTypes::ncsp, Precision::FP32}, {TensorDescCreatorTypes::ncsp, Precision::FP32},
+                       {TensorDescCreatorTypes::ncsp, Precision::FP32}, {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                      {{TensorDescCreatorTypes::ncsp, Precision::FP32}, {TensorDescCreatorTypes::ncsp, Precision::FP32}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
+            throw;
         }
     }
 
@@ -420,7 +443,7 @@ private:
     std::vector<int> roi_indices_;
 };
 
-REG_FACTORY_FOR(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
+REG_FACTORY_FOR(ExperimentalDetectronGenerateProposalsSingleImageImpl, ExperimentalDetectronGenerateProposalsSingleImage);
 
 }  // namespace Cpu
 }  // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp b/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp
deleted file mode 100644
index b3e87a3979f..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp
+++ /dev/null
@@ -1,523 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-#include <cmath>
-#include <vector>
-#include <string>
-#include <mkldnn_types.h>
-#include "ie_parallel.hpp"
-#include "utils/bfloat16.hpp"
-#include <mkldnn_selective_build.h>
-
-using namespace MKLDNNPlugin;
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class PSROIPoolingImpl: public ExtLayerBase {
-public:
-    explicit PSROIPoolingImpl(const CNNLayer* layer) {
-        try {
-            mode = layer->GetParamAsString("mode", "average");
-            if (mode != "bilinear_deformable")
-                if (layer->insData.size() !=  2 || layer->outData.size() != 1)
-                    IE_THROW() << "Incorrect number of input/output edges!";
-            // LayerSetUp
-            outputDim = static_cast<size_t>(layer->GetParamAsInt("output_dim"));
-            groupSize = static_cast<size_t>(layer->GetParamAsInt("group_size"));
-            spatialScale = layer->GetParamAsFloat("spatial_scale");
-            pooledHeight = static_cast<size_t>(layer->GetParamAsInt("pooled_height", static_cast<int>(groupSize)));
-            pooledWidth = static_cast<size_t>(layer->GetParamAsInt("pooled_width", static_cast<int>(groupSize)));
-            spatialBinsX = static_cast<size_t>(layer->GetParamAsInt("spatial_bins_x", 1));
-            spatialBinsY = static_cast<size_t>(layer->GetParamAsInt("spatial_bins_y", 1));
-
-            SizeVector inDims = layer->insData[0].lock()->getTensorDesc().getDims();
-            channels = static_cast<int>(inDims[1]);
-            height = static_cast<int>(inDims[2]);
-            width = static_cast<int>(inDims[3]);
-
-            SizeVector outDims = layer->outData[0]->getTensorDesc().getDims();
-            nn = static_cast<int>(outDims[0]);
-            nc = static_cast<int>(outDims[1]);
-            nh = static_cast<int>(outDims[2]);
-            nw = static_cast<int>(outDims[3]);
-
-            //  for Deformable PSROIPolling
-            noTrans = layer->GetParamAsBool("no_trans", true);
-            partSize = layer->GetParamAsInt("part_size", 1);
-            transStd = layer->GetParamAsFloat("trans_std", 1);
-
-            auto supportedPrecision = (layer->insData[0].lock()->getTensorDesc().getPrecision() == Precision::BF16 ? Precision::BF16 : Precision::FP32);
-
-            std::vector<std::pair<Layout, Layout> > plainConfs{
-                {NCHW, NCHW},
-                {NHWC, NHWC}
-            };
-
-            std::vector<std::pair<ConfLayout, ConfLayout> > blockConfs {
-                    {ConfLayout::BLK16, ConfLayout::BLK16},
-                    {ConfLayout::BLK8, ConfLayout::BLK8}
-            };
-
-            if (mode != "bilinear_deformable") {
-                for (auto conf : plainConfs) {
-                    LayerConfig config;
-                    DataConfig inConfig0, inConfig1, inConfig2;
-                    SizeVector propDims = layer->insData[1].lock()->getTensorDesc().getDims();
-                    inConfig0.desc = TensorDesc(supportedPrecision, inDims, conf.first);
-                    inConfig1.desc = TensorDesc(Precision::FP32, propDims, NC);
-                    config.inConfs.push_back(inConfig0);
-                    config.inConfs.push_back(inConfig1);
-                    DataConfig outConfig;
-                    outConfig.desc = TensorDesc(supportedPrecision, outDims, conf.second);
-                    config.outConfs.push_back(outConfig);
-                    confs.push_back(config);
-                }
-                for (auto conf : blockConfs) {
-                    addConfig(layer, {DataConfigurator(conf.first, supportedPrecision),
-                                      DataConfigurator(ConfLayout::PLN, Precision::FP32)},
-                              {DataConfigurator(conf.second, supportedPrecision)});
-                }
-            } else if (noTrans) {
-                addConfig(layer, {DataConfigurator(ConfLayout::PLN, supportedPrecision), DataConfigurator(ConfLayout::PLN, Precision::FP32)},
-                            {DataConfigurator(ConfLayout::PLN, supportedPrecision)});
-            } else {
-                addConfig(layer, {DataConfigurator(ConfLayout::PLN, supportedPrecision),
-                                  DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                                  DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN, supportedPrecision)});
-            }
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    struct PSROIPoolingContext {
-        PSROIPoolingImpl &node;
-        std::vector<Blob::Ptr>& inputs;
-        std::vector<Blob::Ptr>& outputs;
-    };
-
-    template<typename T>
-    struct PSROIPoolingExecute {
-        using srcT = typename std::tuple_element<0, T>::type;
-        using dstT = typename std::tuple_element<1, T>::type;
-
-        void operator()(PSROIPoolingContext & ctx) {
-            ctx.node.executeSpecified<srcT, dstT>(ctx.inputs, ctx.outputs);
-        }
-    };
-
-    static void unpackParams(const TensorDesc& srcDesc, const TensorDesc& dstDesc,
-                      int& hInputStride, int& wInputStride,
-                      int& hOutputStride, int& wOutputStride,
-                      Layout& inFmt, Layout& outFmt,
-                      int& inBlockSize, int& outBlockSize,
-                      int& outBlockCount,
-                      unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding) {
-        inFmt = srcDesc.getLayout();
-        outFmt = dstDesc.getLayout();
-        int expectedInBlockDimsSize = (inFmt == Layout::BLOCKED ? 5 : 4);
-        int expectedOutBlockDimsSize = (outFmt == Layout::BLOCKED ? 5 : 4);
-        auto inBlkDims = srcDesc.getBlockingDesc().getBlockDims();
-        auto outBlkDims = dstDesc.getBlockingDesc().getBlockDims();
-        if (inBlkDims.size() != expectedInBlockDimsSize)
-            IE_THROW() << "Unexpected size of blocking dims in input (given " << inBlkDims.size() << ", expected " << expectedInBlockDimsSize << ")";
-        if (outBlkDims.size() != expectedOutBlockDimsSize)
-            IE_THROW() << "Unexpected size of blocking dims in output (given " << outBlkDims.size() << ", expected " << expectedOutBlockDimsSize << ")";
-
-        inBlockSize = (inFmt == Layout::BLOCKED ? srcDesc.getBlockingDesc().getBlockDims()[4] : 1);
-        outBlockSize = (outFmt == Layout::BLOCKED ? dstDesc.getBlockingDesc().getBlockDims()[4] : 1);
-        inputChannelsPadding = srcDesc.getBlockingDesc().getBlockDims()[1] * inBlockSize;
-        outputChannelsPadding = dstDesc.getBlockingDesc().getBlockDims()[1] * outBlockSize;
-        outBlockCount = outputChannelsPadding / outBlockSize;
-
-        int hOutStrIndex = 0, wOutStrIndex = 0, hInStrIndex = 0, wInStrIndex = 0;
-        const auto& outOrder = dstDesc.getBlockingDesc().getOrder();
-        const auto& inOrder = srcDesc.getBlockingDesc().getOrder();
-        for (int i = 0; i < outOrder.size(); i++) {
-            if (outOrder[i] == 2) hOutStrIndex = i;
-            if (outOrder[i] == 3) wOutStrIndex = i;
-        }
-        for (int i = 0; i < inOrder.size(); i++) {
-            if (inOrder[i] == 2) hInStrIndex = i;
-            if (inOrder[i] == 3) wInStrIndex = i;
-        }
-        hInputStride = srcDesc.getBlockingDesc().getStrides()[hInStrIndex];
-        wInputStride = srcDesc.getBlockingDesc().getStrides()[wInStrIndex];
-        hOutputStride = dstDesc.getBlockingDesc().getStrides()[hOutStrIndex];
-        wOutputStride = dstDesc.getBlockingDesc().getStrides()[wOutStrIndex];
-    }
-
-    template <typename inputType, typename outputType>
-    void executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois,
-                        const int n, const int roiBatchInd,
-                        const TensorDesc& srcDesc, const TensorDesc& dstDesc) {
-        Layout inFmt, outFmt;
-        int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride;
-        unsigned long inputChannelsPadding, outputChannelsPadding;
-        unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride,
-            inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
-        const float roiStartW = static_cast<float>(round(bottomRois[1])) * spatialScale;
-        const float roiStartH = static_cast<float>(round(bottomRois[2])) * spatialScale;
-        const float roiEndW   = static_cast<float>(round(bottomRois[3] + 1.0f)) * spatialScale;
-        const float roiEndH   = static_cast<float>(round(bottomRois[4] + 1.0f)) * spatialScale;
-        // Force too small ROIs to be 1x1
-        const float roiWidth  = std::max<float>(roiEndW - roiStartW, 0.1f);  // avoid 0
-        const float roiHeight = std::max<float>(roiEndH - roiStartH, 0.1f);
-
-        auto avgPsroi = [&] (int c, int h, int w, int binOffIn, int binOffOut, int inBlkRes, int outBlkRes) {
-            float binSizeH = roiHeight / static_cast<float>(pooledHeight);
-            float binSizeW = roiWidth / static_cast<float>(pooledWidth);
-
-            int hStart = static_cast<int>(floor(static_cast<float>(h + 0) * binSizeH + roiStartH));
-            int hEnd = static_cast<int>(ceil(static_cast<float>(h + 1) * binSizeH + roiStartH));
-
-            hStart = std::min<int>(std::max<int>(hStart, 0), height);
-            hEnd = std::min<int>(std::max<int>(hEnd, 0), height);
-            int wStart = static_cast<int>(floor(static_cast<float>(w + 0) * binSizeW + roiStartW));
-            int wEnd = static_cast<int>(ceil(static_cast<float>(w + 1) * binSizeW + roiStartW));
-
-            wStart = std::min<int>(std::max<int>(wStart, 0), width);
-            wEnd = std::min<int>(std::max<int>(wEnd, 0), width);
-
-            const float binArea = static_cast<float>((hEnd - hStart) * (wEnd - wStart));
-
-            size_t dstIndex = binOffOut + h * hOutputStride + w * wOutputStride + outBlkRes;
-            dstData[dstIndex] = 0;
-            if (binArea) {
-                float outSum = 0.0f;
-                const int heightIndexBound = hEnd * hInputStride;
-                const int widthIndexBound = wEnd * wInputStride;
-                for (int hh = hStart * hInputStride; hh < heightIndexBound; hh += hInputStride) {
-                    for (int ww = wStart * wInputStride; ww < widthIndexBound; ww += wInputStride) {
-                        outSum += srcData[binOffIn + hh + ww + inBlkRes];
-                    }
-                }
-                dstData[dstIndex] = outSum / binArea;
-            }
-        };
-        if (inFmt == Layout::NHWC) {
-            parallel_for2d(nh, nw, [&](int h, int w) {
-                const int binOffsetOutput = n * nc * nh * nw;
-                const int binOffsetInput = roiBatchInd * channels * height * width;
-                for (int c = 0; c < nc; c++) {
-                    const int gc = (c * groupSize + h) * groupSize + w;
-                    avgPsroi(c, h, w, 0, 0, binOffsetInput + gc, binOffsetOutput + c);
-                }
-            });
-        } else if (inFmt == Layout::NCHW) {
-            parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
-                const int gc = (c * groupSize + h) * groupSize + w;
-                const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize);
-                const int outputBlockIdx = (c / outBlockSize) * outBlockSize;
-                const int binOffsetInput = (roiBatchInd * inputChannelsPadding + gc) * height * width;
-                const int binOffsetOutput = (n * outputChannelsPadding + outputBlockIdx) * nh * nw;
-                avgPsroi(c, h, w, 0, outputBlockResidual, binOffsetInput, binOffsetOutput);
-            });
-        } else {  // nChw16c, nChw8c
-            parallel_for3d(outBlockCount, nh, nw, [&](int blkIdx, int h, int w) {
-                int cStart = blkIdx * outBlockSize;
-                int cEnd = (blkIdx == outBlockCount - 1 ? nc : cStart + outBlockSize);
-                for (int c = cStart; c < cEnd; c++) {
-                    const int gc = (c * groupSize + h) * groupSize + w;
-                    const int inputBlockResidual = (inFmt == Layout::NCHW ? 0 : gc % inBlockSize);
-                    const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize);
-                    const int inputBlockIdx = (gc / inBlockSize) * inBlockSize;
-                    const int outputBlockIdx = (c / outBlockSize) * outBlockSize;
-                    const int binOffsetInput = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width;
-                    const int binOffsetOutput = (n * outputChannelsPadding + outputBlockIdx) * nh * nw;
-                    avgPsroi(c, h, w, inputBlockResidual, outputBlockResidual, binOffsetInput, binOffsetOutput);
-                }
-            });
-        }
-    }
-
-    template <typename inputType, typename outputType>
-    void executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois,
-                                     const int currentRoi, const int roiBatchInd,
-                                     const TensorDesc& srcDesc, const TensorDesc& dstDesc) {
-        Layout inFmt, outFmt;
-        int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride;
-        unsigned long inputChannelsPadding, outputChannelsPadding;
-        unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride,
-                     inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
-        const float roiStartW = bottomRois[1] * spatialScale;
-        const float roiStartH = bottomRois[2] * spatialScale;
-        const float roiEndW = bottomRois[3] * spatialScale;
-        const float roiEndH = bottomRois[4] * spatialScale;
-        const float roiWidth  = roiEndW - roiStartW;
-        const float roiHeight = roiEndH - roiStartH;
-        size_t numBins = spatialBinsX * spatialBinsY;
-        const int binCount = nh * nw;
-
-        auto bilinearPsroi = [&] (int c, int h, int w, int binOffOut, int outBlkRes) {
-            float accum = 0.0f;
-            int binOffIn, inBlkRes;
-            size_t dstIndex = binOffOut + h * hOutputStride + w * wOutputStride + outBlkRes;
-            dstData[dstIndex] = 0;
-
-            for (size_t binY = 0; binY < spatialBinsY; binY++) {
-                const float boxYmin = roiStartH + (binY + 0) * (roiHeight / spatialBinsY);
-                const float boxYmax = roiStartH + (binY + 1) * (roiHeight / spatialBinsY);
-                const float heightScale = nh > 1 ? (boxYmax - boxYmin) * (height - 1) / (pooledHeight - 1) : 0.0f;
-                const float inY = nh > 1 ? (h * heightScale + boxYmin * (height - 1)) : 0.5f * (boxYmin + boxYmax) * (height - 1);
-                for (size_t binX = 0; binX < spatialBinsX; binX++) {
-                    size_t gc = c + (binY * spatialBinsX + binX) * nc;
-                    if (inFmt == Layout::NHWC) {
-                        binOffIn = roiBatchInd * channels * height * width + gc;
-                        inBlkRes = 0;
-                    } else {  // nchw, nChw16c, nChw8c
-                        const int inputBlockIdx = (gc / inBlockSize) * inBlockSize;
-                        binOffIn = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width;
-                        inBlkRes = (inFmt == Layout::BLOCKED ? gc % inBlockSize : 0);
-                    }
-                    const auto *bottomData = srcData + binOffIn;
-
-                    const float boxXmin = roiStartW + (binX + 0) * (roiWidth / spatialBinsX);
-                    const float boxXmax = roiStartW + (binX + 1) * (roiWidth / spatialBinsX);
-
-                    const float widthScale = nw > 1 ? (boxXmax - boxXmin) * (width - 1) / (pooledWidth - 1) : 0.0f;
-                    const float inX = nw > 1 ? (w * widthScale + boxXmin * (width - 1)) : 0.5f * (boxXmin + boxXmax) * (width - 1);
-
-                    if (!(inY < 0 || inY > height - 1 || inX < 0 || inX > width - 1)) {
-                        const int topYIndex = static_cast<int>(floorf(inY));
-                        int bottomYIndex = static_cast<int>(ceilf(inY));
-                        const int leftXIndex = static_cast<int>(floorf(inX));
-                        int rightXIndex = static_cast<int>(ceilf(inX));
-
-                        if (rightXIndex > width - 1) rightXIndex = width - 1;
-                        if (bottomYIndex > height - 1) bottomYIndex = height - 1;
-
-                        auto topLeftIndex = topYIndex * hInputStride + leftXIndex * wInputStride + inBlkRes;
-                        auto topRightIndex = topYIndex * hInputStride + rightXIndex * wInputStride + inBlkRes;
-                        auto bottomLeftIndex = bottomYIndex * hInputStride + leftXIndex * wInputStride + inBlkRes;
-                        auto bottomRightIndex = bottomYIndex * hInputStride + rightXIndex * wInputStride + inBlkRes;
-
-                        const float topLeft = bottomData[topLeftIndex];
-                        const float topRight = bottomData[topRightIndex];
-                        const float bottomLeft = bottomData[bottomLeftIndex];
-                        const float bottomRight = bottomData[bottomRightIndex];
-
-                        const float top = topLeft + (topRight - topLeft) * (inX - leftXIndex);
-                        const float bottom = bottomLeft + (bottomRight - bottomLeft) * (inX - leftXIndex);
-
-                        accum += top + (bottom - top) * (inY - topYIndex);
-                    }
-                }
-            }
-            accum /= numBins;
-            dstData[dstIndex] = accum;
-        };
-
-        if (inFmt == Layout::NHWC) {
-            const int binOffsetOutput = currentRoi * nc * nh * nw;
-            parallel_for2d(nh, nw, [&](int h, int w) {
-                for (int c = 0; c < nc; c++) {
-                    bilinearPsroi(c, h, w, 0, binOffsetOutput + c);
-                }
-            });
-        } else if (inFmt == Layout::NCHW) {
-            parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
-                bilinearPsroi(c, h, w, 0, (currentRoi * outputChannelsPadding + c) * binCount);
-            });
-        } else {  // nChw16c, nChw8c
-            parallel_for3d(outBlockCount, nh, nw, [&](int blkIdx, int h, int w) {
-                int cStart = blkIdx * outBlockSize;
-                int cEnd = (blkIdx == outBlockCount - 1 ? nc : cStart + outBlockSize);
-                for (int c = cStart; c < cEnd; c++) {
-                    const int outputBlockIdx = (c / inBlockSize) * inBlockSize;
-                    const int binOffsetOutput = (currentRoi * outputChannelsPadding + outputBlockIdx) * binCount;
-                    const int outputBlockResidual = (inFmt == Layout::BLOCKED ? c % inBlockSize : 0);
-                    bilinearPsroi(c, h, w, outputBlockResidual, binOffsetOutput);
-                }
-            });
-        }
-    }
-
-    template <typename inputType, typename outputType>
-    void executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois,
-                                   const float *bottomTrans, const int numClasses, const int channelsEachClass,
-                                   const int currentRoi, const int roiBatchInd) {
-        const float roiStartW = static_cast<float>(round(bottomRois[1])) * spatialScale - 0.5f;
-        const float roiStartH = static_cast<float>(round(bottomRois[2])) * spatialScale - 0.5f;
-        const float roiEndW   = static_cast<float>(round(bottomRois[3]) + 1.0f) * spatialScale - 0.5f;
-        const float roiEndH   = static_cast<float>(round(bottomRois[4]) + 1.0f) * spatialScale - 0.5f;
-        // Force too small ROIs to be 1x1
-        const float roiWidth  = std::max<float>(roiEndW - roiStartW, 0.1f);  // avoid 0
-        const float roiHeight = std::max<float>(roiEndH - roiStartH, 0.1f);
-        parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
-            size_t dstIndex = ((currentRoi * nc + c) * nh + h) * nw + w;
-            dstData[dstIndex] = 0;
-            // Compute w and h at bottom
-            float binSizeH = roiHeight / static_cast<float>(pooledHeight);
-            float binSizeW = roiWidth / static_cast<float>(pooledWidth);
-
-            float subBinSizeH = binSizeH / static_cast<float>(spatialBinsX);
-            float subBinSizeW = binSizeW / static_cast<float>(spatialBinsY);
-
-            int partH = h * partSize / pooledHeight;
-            int partW = w * partSize / pooledWidth;
-            int classId = c / channelsEachClass;
-            float transX = noTrans ? 0 :
-                           bottomTrans[(((currentRoi * numClasses + classId) * 2) * partSize + partH)
-                                       * partSize + partW] * transStd;
-            float transY = noTrans ? 0 :
-                           bottomTrans[(((currentRoi * numClasses + classId) * 2 + 1) * partSize + partH)
-                                       * partSize + partW] * transStd;
-
-            float wStart = w * binSizeW + roiStartW + transX * roiWidth;
-            float hStart = h * binSizeH + roiStartH + transY * roiHeight;
-
-            float sum = 0;
-            int count = 0;
-            int gw = w * groupSize / pooledWidth;
-            int gh = h * groupSize / pooledHeight;
-            gw = (std::min)((std::max)(gw, 0), static_cast<int>(groupSize - 1));
-            gh = (std::min)((std::max)(gh, 0), static_cast<int>(groupSize - 1));
-
-            const inputType* offsetBottomData = srcData + (roiBatchInd * channels) * height * width;
-            for (size_t ih = 0; ih < spatialBinsY; ih++) {
-                for (size_t iw = 0; iw < spatialBinsX; iw++) {
-                    float w1 = wStart + iw * subBinSizeW;
-                    float h1 = hStart + ih * subBinSizeH;
-                    // bilinear interpolation
-                    if (w1 < -0.5 || w1 > width - 0.5 || h1 < -0.5 || h1 > height - 0.5)
-                        continue;
-                    w1 = static_cast<float>((std::min)((std::max)(static_cast<double>(w1), 0.0), width - 1.0));
-                    h1 = static_cast<float>((std::min)((std::max)(static_cast<double>(h1), 0.0), height - 1.0));
-                    int c1 = static_cast<int>((c * groupSize + gh) * groupSize + gw);
-                    float val = bilinearInterp<inputType>(offsetBottomData +
-                                                          c1 * height * width, w1, h1, width);
-
-                    sum += val;
-                    count++;
-                }
-            }
-            dstData[dstIndex] = count == 0 ? 0 : sum / count;
-        });
-    }
-
-    template <typename inputType, typename outputType>
-    void executeSpecified(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs) {
-        const auto *srcData = inputs[0]->cbuffer().as<const inputType*>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const float *bottomRoisBeginning = inputs[1]->cbuffer().as<const float*>() + inputs[1]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        auto *dstData = outputs[0]->buffer().as<outputType*>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        auto srcDesc = inputs[0]->getTensorDesc();
-        auto dstDesc = outputs[0]->getTensorDesc();
-
-        int realRois = 0;
-        for (; realRois < nn; realRois++) {
-            int roiBatchInd = static_cast<int>(bottomRoisBeginning[realRois * 5]);
-            if (roiBatchInd == -1) {
-                break;
-            }
-        }
-
-        //  for Deformable PSROIPooling
-        float *bottomTrans = nullptr;
-        int numClasses = 1;
-        int channelsEachClass = outputDim;
-        if (!noTrans) {
-            bottomTrans = inputs[2]->cbuffer().as<float*>() + inputs[2]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-            numClasses = static_cast<int>(inputs[2]->getTensorDesc().getDims()[1]) / 2;
-            channelsEachClass /= numClasses;
-        }
-
-        parallel_for(realRois, [&](int currentRoi) {
-            const float *bottomRois = bottomRoisBeginning + currentRoi * 5;
-            int roiBatchInd = static_cast<int>(bottomRois[0]);
-            if (mode == "average") {
-                executeAverage(srcData, dstData, bottomRois, currentRoi, roiBatchInd, srcDesc, dstDesc);
-            } else if (mode == "bilinear") {
-                executeBilinear(srcData, dstData, bottomRois, currentRoi, roiBatchInd, srcDesc, dstDesc);
-            } else if (mode == "bilinear_deformable") {
-                executeBilinearDeformable(srcData, dstData, bottomRois, bottomTrans,
-                        numClasses, channelsEachClass, currentRoi, roiBatchInd);
-            }
-        });
-
-        memset(dstData + realRois * nc * nh * nw, 0, (nn - realRois) * nc * nh * nw * sizeof(outputType));
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        try {
-            auto inputPrec = inputs[0]->getTensorDesc().getPrecision();
-            auto outputPrec = outputs[0]->getTensorDesc().getPrecision();
-
-            if (!((inputPrec == Precision::BF16 && outputPrec == Precision::BF16) ||
-                  (inputPrec == Precision::FP32 && outputPrec == Precision::FP32)))
-                return NOT_IMPLEMENTED;
-
-            PSROIPoolingContext ctx = {
-                    *this,
-                    inputs,
-                    outputs
-            };
-
-            OV_SWITCH(MKLDNNPlugin, PSROIPoolingExecute, ctx, std::tie(inputPrec, outputPrec),
-                      OV_CASE2(Precision::FP32, Precision::FP32, float, float),
-                      OV_CASE2(Precision::BF16, Precision::BF16, bfloat16_t, bfloat16_t))
-
-            return OK;
-        }
-        catch (const std::exception& excp) {
-            snprintf(resp->msg, sizeof(resp->msg), "%s", excp.what());
-            return GENERAL_ERROR;
-        }
-        catch(...) {
-            return GENERAL_ERROR;
-        }
-    }
-
-    template <typename inputType>
-    inline float bilinearInterp(const inputType* data, const float x, const float y, const int width_) {
-        int x1 = static_cast<int>(std::floor(x));
-        int x2 = static_cast<int>(std::ceil(x));
-        int y1 = static_cast<int>(std::floor(y));
-        int y2 = static_cast<int>(std::ceil(y));
-        float distX = x - x1;
-        float distY = y - y1;
-
-        float value11 = data[y1 * width_ + x1];
-        float value12 = data[y2 * width_ + x1];
-        float value21 = data[y1 * width_ + x2];
-        float value22 = data[y2 * width_ + x2];
-        float value = (1 - distX) * (1 - distY) * value11 + (1 - distX) * distY * value12
-                      + distX * (1 - distY) * value21 + distX * distY * value22;
-        return value;
-    }
-
-private:
-    size_t outputDim = 0;
-    size_t groupSize = 0;
-    float spatialScale = 0;
-    size_t pooledHeight = 0;
-    size_t pooledWidth = 0;
-    size_t spatialBinsX = 0;
-    size_t spatialBinsY = 0;
-    std::string mode = "";
-
-    int channels = 0;
-    int height = 0;
-    int width = 0;
-
-    int nn = 0;
-    int nc = 0;
-    int nh = 0;
-    int nw = 0;
-
-    //  for Deformable PSROIPolling
-    bool noTrans;
-    int partSize;
-    float transStd;
-};
-
-REG_FACTORY_FOR(PSROIPoolingImpl, PSROIPooling);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/range.cpp b/inference-engine/src/mkldnn_plugin/nodes/range.cpp
index fc0295f2325..30de35f6c72 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/range.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/range.cpp
@@ -9,50 +9,84 @@
 #include <vector>
 #include <cassert>
 #include "ie_parallel.hpp"
+#include <ngraph/opsets/opset1.hpp>
+#include <utils/general_utils.h>
+
+using namespace MKLDNNPlugin;
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+
+
 class RangeImpl: public ExtLayerBase {
-public:
-    explicit RangeImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
+            if (!MKLDNNPlugin::one_of(op->get_type_info(), ngraph::op::v0::Range::type_info, ngraph::op::v4::Range::type_info)) {
+                errorMessage = "Only opset1 and opset4 Range operation is supported";
+                return false;
+            }
+            if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(RANGE_START)) == nullptr ||
+                std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(RANGE_LIMIT)) == nullptr ||
+                    std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(RANGE_DELTA)) == nullptr) {
+                errorMessage = "Only const inputs for Range operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            if (layer->insData.size() != 3)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
+    std::string errorPrefix;
 
-            SizeVector start_dims = layer->insData[RANGE_START].lock()->getTensorDesc().getDims();
-            if (start_dims.size() > 1)
-                IE_THROW() << layer->name << " Start scalar should have 1 dimension";
+public:
+    explicit RangeImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
 
-            SizeVector limit_dims = layer->insData[RANGE_LIMIT].lock()->getTensorDesc().getDims();
-            if (limit_dims.size() > 1)
-                IE_THROW() << layer->name << " Limit scalar should have 1 dimension";
+            errorPrefix = "Range layer with name '" + op->get_friendly_name() + "'";
 
-            SizeVector delta_dims = layer->insData[RANGE_DELTA].lock()->getTensorDesc().getDims();
-            if (delta_dims.size() > 1)
-                IE_THROW() << layer->name << " Delta scalar should have 1 dimension";
+            if (op->get_input_size() != 3 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
 
-            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
+            SizeVector start_dims = op->get_input_shape(RANGE_START);
+            if (ngraph::shape_size(start_dims) != 1)
+                IE_THROW() << errorPrefix << " has start scalar with more than 1 value";
+
+            SizeVector limit_dims = op->get_input_shape(RANGE_LIMIT);
+            if (ngraph::shape_size(limit_dims) != 1)
+                IE_THROW() << errorPrefix << " has limit scalar with more than 1 value";
+
+            SizeVector delta_dims = op->get_input_shape(RANGE_DELTA);
+            if (ngraph::shape_size(delta_dims) != 1)
+                IE_THROW() << errorPrefix << " has delta scalar with more than 1 value";
+
+            SizeVector dst_dims = op->get_output_shape(0);
             if (dst_dims.size() > 1)
-                IE_THROW() << layer->name << " Output vector should have 1 dimension";
+                IE_THROW() << errorPrefix << " has unsupported rank for output: " << dst_dims.size();
 
-            if (!(layer->insData[RANGE_START].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
-                  layer->insData[RANGE_LIMIT].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
-                  layer->insData[RANGE_DELTA].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
-                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
-                !(layer->insData[RANGE_START].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
-                  layer->insData[RANGE_LIMIT].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
-                  layer->insData[RANGE_DELTA].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
-                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
-                addConfig(layer, { DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                    DataConfigurator(ConfLayout::PLN, Precision::FP32) }, { DataConfigurator(ConfLayout::PLN, Precision::FP32) });
+            if (!(details::convertPrecision(op->get_input_element_type(RANGE_START)) == Precision::I32 &&
+                  details::convertPrecision(op->get_input_element_type(RANGE_LIMIT)) == Precision::I32 &&
+                  details::convertPrecision(op->get_input_element_type(RANGE_DELTA)) == Precision::I32 &&
+                  details::convertPrecision(op->get_output_element_type(0)) == Precision::I32) &&
+                !(details::convertPrecision(op->get_input_element_type(RANGE_START)) == Precision::FP32 &&
+                  details::convertPrecision(op->get_input_element_type(RANGE_LIMIT)) == Precision::FP32 &&
+                  details::convertPrecision(op->get_input_element_type(RANGE_DELTA)) == Precision::FP32 &&
+                  details::convertPrecision(op->get_output_element_type(0)) == Precision::FP32)) {
+                      addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                                     {TensorDescCreatorTypes::ncsp, Precision::FP32},
+                                     {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                                    {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
             } else {
-                addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
-                                 { DataConfigurator(ConfLayout::PLN) });
+                addConfig(op, {{TensorDescCreatorTypes::ncsp},
+                               {TensorDescCreatorTypes::ncsp},
+                               {TensorDescCreatorTypes::ncsp}},
+                              {{TensorDescCreatorTypes::ncsp}});
             }
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
@@ -95,9 +129,9 @@ public:
     }
 
 private:
-    const size_t RANGE_START = 0;
-    const size_t RANGE_LIMIT = 1;
-    const size_t RANGE_DELTA = 2;
+    static const size_t RANGE_START = 0;
+    static const size_t RANGE_LIMIT = 1;
+    static const size_t RANGE_DELTA = 2;
 
     template <typename data_t>
     StatusCode range(data_t start, data_t limit, data_t delta, Blob::Ptr output);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp b/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp
deleted file mode 100644
index 3a6514cabc3..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp
+++ /dev/null
@@ -1,446 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-#include "utils/general_utils.h"
-#include "common/defs.h"
-#include "common/softmax.h"
-#include "common/cpu_convert.h"
-#include <vector>
-#include <algorithm>
-#include <memory>
-#include <ie_parallel.hpp>
-#include <mkldnn_extension_utils.h>
-#include "utils/bfloat16.hpp"
-#include "emitters/jit_bf16_emitters.hpp"
-#include "mkldnn.hpp"
-#include <cpu/x64/jit_generator.hpp>
-#include <cpu/x64/jit_uni_eltwise_injector.hpp>
-
-using namespace mkldnn;
-using namespace MKLDNNPlugin;
-using namespace InferenceEngine;
-using namespace mkldnn::impl::cpu;
-using namespace mkldnn::impl::cpu::x64;
-using namespace mkldnn::impl::utils;
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-#define GET_OFF(field) offsetof(jit_args_logistic, field)
-
-struct jit_args_logistic {
-    const void* src;
-    void* dst;
-    size_t work_amount;
-};
-
-struct jit_logistic_config_params {
-    InferenceEngine::Precision src_dt;
-    InferenceEngine::Precision dst_dt;
-    unsigned src_data_size = 0;
-    unsigned dst_data_size = 0;
-};
-
-struct jit_uni_logistic_kernel {
-    void (*ker_)(const jit_args_logistic *);
-
-    void operator()(const jit_args_logistic *args) { assert(ker_); ker_(args); }
-
-    virtual void create_ker() = 0;
-
-    jit_uni_logistic_kernel() : ker_(nullptr) {}
-    virtual ~jit_uni_logistic_kernel() {}
-};
-
-template <cpu_isa_t isa>
-struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_generator {
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_logistic_kernel_f32)
-
-    jit_uni_logistic_kernel_f32(jit_logistic_config_params jcp) : jcp_(jcp), jit_uni_logistic_kernel(), jit_generator() {}
-
-    void create_ker() override {
-        jit_generator::create_kernel();
-        ker_ = (decltype(ker_))jit_ker();
-    }
-
-    void generate() override {
-        exp_injector.reset(new jit_uni_eltwise_injector_f32<isa>(this, mkldnn::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f));
-
-        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
-            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa, nullptr));
-
-        this->preamble();
-
-        mov(reg_src, ptr[reg_params + GET_OFF(src)]);
-        mov(reg_dst, ptr[reg_params + GET_OFF(dst)]);
-        mov(reg_work_amount, ptr[reg_params + GET_OFF(work_amount)]);
-        mov(reg_table, l_table);
-
-        Xbyak::Label main_loop_label;
-        Xbyak::Label tail_loop_label;
-        Xbyak::Label exit_label;
-
-        int step = vlen / sizeof(float);
-        L(main_loop_label); {
-            cmp(reg_work_amount, step);
-            jl(tail_loop_label, T_NEAR);
-
-            load_vector(vmm_src, ptr[reg_src], jcp_.src_dt);
-            compute_kernel();
-            store_vector(ptr[reg_dst], vmm_src, jcp_.dst_dt);
-
-            add(reg_src, step * jcp_.src_data_size);
-            add(reg_dst, step * jcp_.dst_data_size);
-            sub(reg_work_amount, step);
-
-            jmp(main_loop_label, T_NEAR);
-        }
-
-        step = 1;
-        L(tail_loop_label); {
-            cmp(reg_work_amount, step);
-            jl(exit_label, T_NEAR);
-
-            load_scalar(xmm_src, ptr[reg_src], jcp_.src_dt);
-            compute_kernel();
-            store_scalar(ptr[reg_dst], xmm_src, jcp_.dst_dt);
-
-            add(reg_src, step * jcp_.src_data_size);
-            add(reg_dst, step * jcp_.dst_data_size);
-            sub(reg_work_amount, step);
-
-            jmp(tail_loop_label, T_NEAR);
-        }
-
-        L(exit_label);
-
-        this->postamble();
-
-        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
-            emu_vcvtneps2bf16->emit_data();
-
-        exp_injector->prepare_table();
-
-        prepare_table();
-    }
-
-private:
-    using Vmm = typename conditional3<isa == x64::sse41, Xbyak::Xmm, isa == x64::avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
-    size_t vlen = cpu_isa_traits<isa>::vlen;
-
-    Xbyak::Address table_val(int index) { return ptr[reg_table + index * vlen]; }
-
-    Xbyak::Reg64 reg_src = r8;
-    Xbyak::Reg64 reg_dst = r9;
-    Xbyak::Reg64 reg_table = r10;
-    Xbyak::Reg64 reg_work_amount = r11;
-    Xbyak::Reg64 reg_params = abi_param1;
-
-    Vmm vmm_aux0 = Vmm(0);
-    Vmm vmm_src = Vmm(1);
-    Xbyak::Xmm xmm_src = Xbyak::Xmm(1);
-    Vmm vmm_aux1 = Vmm(2);
-    Vmm vmm_aux2 = Vmm(3);
-
-    const Xbyak::Opmask k_mask = Xbyak::Opmask(1);
-
-    std::unique_ptr<jit_emu_vcvtneps2bf16> emu_vcvtneps2bf16;
-
-    Xbyak::Label l_table;
-
-    std::shared_ptr<jit_uni_eltwise_injector_f32<isa>> exp_injector;
-
-    jit_logistic_config_params jcp_;
-
-    void compute_kernel() {
-        uni_vmovups(vmm_aux0, vmm_src);
-        uni_vandps(vmm_aux0, vmm_aux0, table_val(0));
-        uni_vorps(vmm_src, vmm_src, table_val(0));
-
-        exp_injector->compute_vector_range(vmm_src.getIdx(), vmm_src.getIdx() + 1);
-
-        uni_vmovups(vmm_aux1, vmm_src);
-        uni_vaddps(vmm_aux1, vmm_aux1, table_val(1));
-        uni_vdivps(vmm_src, vmm_src, vmm_aux1);
-
-        uni_vmovups(vmm_aux2, table_val(1));
-        uni_vsubps(vmm_aux2, vmm_aux2, vmm_src);
-
-        if (isa == x64::sse41) {
-            uni_vblendvps(vmm_aux2, vmm_aux2, vmm_src, vmm_aux0);
-            uni_vmovups(vmm_src, vmm_aux2);
-        } else if (isa == x64::avx2) {
-            uni_vblendvps(vmm_src, vmm_aux2, vmm_src, vmm_aux0);
-        } else {
-            vptestmd(k_mask, vmm_aux0, vmm_aux0);
-            vblendmps(vmm_src | k_mask, vmm_aux2, vmm_src);
-        }
-    }
-
-    void prepare_table() {
-        auto broadcast_int = [&](int val) {
-            for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-                dd(val);
-            }
-        };
-
-        align(64);
-        L(l_table);
-
-        broadcast_int(vals_for_logistic_activate.mask_sign);
-        broadcast_int(vals_for_logistic_activate.float_1);
-    }
-
-    const struct vals_for_logistic_activate_type {
-        int mask_sign = 0x80000000;  // 0 //  mask to extract sign
-        int float_1   = 0x3f800000;  // 1 //  1.0f
-    } vals_for_logistic_activate;
-
-    inline void load_vector(Vmm vmm_src, const Xbyak::Address &op, InferenceEngine::Precision src_dt) {
-        switch (src_dt) {
-            case InferenceEngine::Precision::FP32:
-                uni_vmovups(vmm_src, op);
-                break;
-            case InferenceEngine::Precision::BF16:
-                vpmovzxwd(vmm_src, op);
-                uni_vpslld(vmm_src, vmm_src, 16);
-                break;
-            default:
-                assert(!"unknown src_dt");
-        }
-    }
-    inline void store_vector(const Xbyak::Address &op, Vmm vmm_dst, InferenceEngine::Precision dst_dt) {
-        Xbyak::Ymm ymm_dst = Xbyak::Ymm(vmm_dst.getIdx());
-
-        switch (dst_dt) {
-            case InferenceEngine::Precision::FP32:
-                uni_vmovups(op, vmm_dst);
-                break;
-            case InferenceEngine::Precision::BF16:
-                if (mayiuse(avx512_core_bf16))
-                    vcvtneps2bf16(ymm_dst, vmm_dst);
-                else
-                    emu_vcvtneps2bf16->emit_code({static_cast<size_t>(vmm_dst.getIdx())}, {static_cast<size_t>(ymm_dst.getIdx())});
-                vmovdqu16(op, ymm_dst);
-                break;
-            default:
-                assert(!"unknown dst_dt");
-        }
-    }
-    inline void load_scalar(Xbyak::Xmm xmm_src, const Xbyak::Address &op, InferenceEngine::Precision src_dt) {
-        switch (src_dt) {
-            case InferenceEngine::Precision::FP32:
-                movss(xmm_src, op);
-                break;
-            case InferenceEngine::Precision::BF16:
-                pinsrw(xmm_src, op, 0x0);
-                uni_vpslld(xmm_src, xmm_src, 16);
-                break;
-            default:
-                assert(!"unknown src_dt");
-        }
-    }
-    inline void store_scalar(const Xbyak::Address &op, Xbyak::Xmm xmm_dst, InferenceEngine::Precision dst_dt) {
-        switch (dst_dt) {
-            case InferenceEngine::Precision::FP32:
-                movss(op, xmm_dst);
-                break;
-            case InferenceEngine::Precision::BF16:
-                uni_vpsrld(xmm_dst, xmm_dst, 16);
-                pextrw(op, xmm_dst, 0x0);
-                break;
-           default:
-                assert(!"unknown dst_dt");
-        }
-    }
-};
-
-class RegionYoloImpl: public ExtLayerBase {
-public:
-    explicit RegionYoloImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != 1 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
-
-            input_prec = layer->insData.front().lock()->getPrecision();
-            output_prec = layer->outData.front()->getPrecision();
-
-            if (input_prec != Precision::FP32 && input_prec != Precision::BF16) {
-                input_prec = Precision::FP32;
-            }
-
-            if (output_prec != Precision::FP32 && output_prec != Precision::BF16) {
-                output_prec = Precision::FP32;
-            }
-
-            if (Precision::BF16 == output_prec) {
-                if (!mayiuse(avx512_core)) {
-                    output_prec = Precision::FP32;
-                }
-            }
-
-            classes = layer->GetParamAsInt("classes");
-            coords = layer->GetParamAsInt("coords");
-            num = layer->GetParamAsInt("num");
-            do_softmax = layer->GetParamAsBool("do_softmax", true);
-            mask = layer->GetParamAsInts("mask", {});
-
-            jit_logistic_config_params jcp;
-            jcp.src_dt = jcp.dst_dt = output_prec;
-            jcp.src_data_size = jcp.dst_data_size = output_prec.size();
-
-            block_size = 1;
-            if (mayiuse(x64::avx512_common)) {
-                logistic_kernel.reset(new jit_uni_logistic_kernel_f32<x64::avx512_common>(jcp));
-                block_size = 16;
-            } else if (mayiuse(x64::avx2)) {
-                logistic_kernel.reset(new jit_uni_logistic_kernel_f32<x64::avx2>(jcp));
-                block_size = 8;
-            } else if (mayiuse(x64::sse41)) {
-                logistic_kernel.reset(new jit_uni_logistic_kernel_f32<x64::sse41>(jcp));
-                block_size = 4;
-            }
-
-            softmax_kernel = std::make_shared<SoftmaxGeneric>(input_prec, output_prec);
-
-            if (logistic_kernel)
-                logistic_kernel->create_ker();
-
-            addConfig(layer, {DataConfigurator(ConfLayout::PLN, input_prec)}, {DataConfigurator(ConfLayout::PLN, output_prec)});
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        size_t mask_size = mask.size();
-
-        size_t IW = (inputs[0]->getTensorDesc().getDims().size() > 3) ? inputs[0]->getTensorDesc().getDims()[3] : 1;
-        size_t IH = (inputs[0]->getTensorDesc().getDims().size() > 2) ? inputs[0]->getTensorDesc().getDims()[2] : 1;
-        size_t IC = (inputs[0]->getTensorDesc().getDims().size() > 1) ? inputs[0]->getTensorDesc().getDims()[1] : 1;
-        size_t B = (inputs[0]->getTensorDesc().getDims().size() > 0) ? inputs[0]->getTensorDesc().getDims()[0] : 1;
-
-        int end_index = 0;
-        int num_ = 0;
-        if (do_softmax) {
-            // Region layer (Yolo v2)
-            end_index = IW * IH;
-            num_ = num;
-        } else {
-            // Yolo layer (Yolo v3)
-            end_index = IW * IH * (classes + 1);
-            num_ = mask_size;
-        }
-        size_t inputs_size = IH * IW * num_ * (classes + coords + 1);
-        size_t total_size = 2 * IH * IW;
-
-        const auto *src_data = inputs[0]->cbuffer().as<const uint8_t *>();
-        auto *dst_data = outputs[0]->buffer().as<uint8_t *>();
-
-        try {
-            cpu_convert(src_data, dst_data, inputs[0]->getTensorDesc().getPrecision(), outputs[0]->getTensorDesc().getPrecision(), B * IC * IH * IW);
-
-            for (int b = 0; b < B; b++) {
-                for (int n = 0; n < num_; n++) {
-                    size_t index = b * inputs_size + n * IW * IH * (classes + coords + 1);
-                    calculate_logistic(index, total_size, dst_data);
-
-                    index = b * inputs_size + IW * IH * (n * (classes + coords + 1) + coords);
-                    calculate_logistic(index, end_index, dst_data);
-                }
-            }
-
-            if (do_softmax) {
-                int index = IW * IH * (coords + 1);
-                int batch_offset = inputs_size / num;
-                for (int b = 0; b < B * num; b++) {
-                    softmax_kernel->execute(src_data + input_prec.size() * (index + b * batch_offset),
-                                            dst_data + output_prec.size() * (index + b * batch_offset), 1, classes, IH, IW);
-                }
-            }
-        }
-        catch (const std::exception& excp) {
-            snprintf(resp->msg, sizeof(resp->msg), "%s", excp.what());
-            return GENERAL_ERROR;
-        }
-        catch(...) {
-            return GENERAL_ERROR;
-        }
-        return OK;
-    }
-
-private:
-    int classes;
-    int coords;
-    int num;
-    float do_softmax;
-    std::vector<int> mask;
-    Precision input_prec, output_prec;
-
-    int block_size;
-    std::shared_ptr<jit_uni_logistic_kernel> logistic_kernel;
-    std::shared_ptr<SoftmaxGeneric> softmax_kernel;
-
-    union U {
-        float as_float_value;
-        int as_int_value;
-    };
-
-    inline float logistic_scalar(float src) {
-        U aux2;
-        aux2.as_float_value = src;
-        int sign = aux2.as_int_value >> 31;
-        if (sign == 0)
-            src *= -1;
-
-        src = std::exp(src);
-
-        src = src / (src + 1);
-        if (sign == 0)
-            src = 1 - src;
-
-        return src;
-    }
-
-
-    inline void calculate_logistic(size_t start_index, int count, uint8_t * dst_data) {
-        auto dst_data_size = output_prec.size();
-        if (logistic_kernel) {
-            int blocks_num = MKLDNNPlugin::div_up(count, block_size);
-            parallel_for(blocks_num, [&](int ib) {
-                int idx = ib * block_size;
-                int work_amount = std::min(count - idx, block_size);
-
-                auto arg = jit_args_logistic();
-                arg.src = arg.dst = dst_data + dst_data_size * (start_index + idx);
-                arg.work_amount = static_cast<size_t>(work_amount);
-
-                (*logistic_kernel)(&arg);
-            });
-        } else {
-            if (Precision::FP32 == output_prec) {
-                auto float_dst_data = reinterpret_cast<float*>(dst_data);
-                for (int i = 0; i < count; i++) {
-                    float_dst_data[i + start_index] = logistic_scalar(float_dst_data[i + start_index]);
-                }
-            } else if (Precision::BF16 == output_prec) {
-                auto bf16_dst_data = reinterpret_cast<MKLDNNPlugin::bfloat16_t*>(dst_data);
-                for (int i = 0; i < count; i++) {
-                    bf16_dst_data[i + start_index] = logistic_scalar(bf16_dst_data[i + start_index]);
-                }
-            } else {
-                IE_THROW() << "Unsupported precision configuration outPrc=" << output_prec.name();
-            }
-        }
-    }
-};
-
-REG_FACTORY_FOR(RegionYoloImpl, RegionYolo);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp b/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp
index 7b2a0841ad0..ff705fc63b0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp
@@ -4,6 +4,9 @@
 
 #include "base.hpp"
 #include <vector>
+#include <ngraph/opsets/opset2.hpp>
+
+using namespace MKLDNNPlugin;
 
 namespace InferenceEngine {
 namespace Extensions {
@@ -11,14 +14,38 @@ namespace Cpu {
 
 class ReorgYoloImpl: public ExtLayerBase {
 public:
-    explicit ReorgYoloImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() != 1 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
+            const auto reorgYolo = std::dynamic_pointer_cast<const ngraph::opset2::ReorgYolo>(op);
+            if (!reorgYolo) {
+                errorMessage = "Only opset2 ReorgYolo operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            stride = layer->GetParamAsInt("stride");
+    explicit ReorgYoloImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
 
-            addConfig(layer, {DataConfigurator(ConfLayout::PLN, Precision::FP32)}, {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
+            errorPrefix = std::string(op->get_type_name()) + " node with name '" + op->get_friendly_name() + "'";
+            if (op->get_input_size() != 1 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+            const auto reorgYolo = std::dynamic_pointer_cast<const ngraph::opset2::ReorgYolo>(op);
+            const auto strides = reorgYolo->get_strides();
+            if (strides.empty())
+                IE_THROW() << errorPrefix << " has empty strides";
+            stride = strides[0];
+
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
         }
@@ -61,6 +88,8 @@ public:
 
 private:
     int stride;
+
+    std::string errorPrefix;
 };
 
 REG_FACTORY_FOR(ReorgYoloImpl, ReorgYolo);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp b/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp
index 663f4cdff7d..85ab2b8c414 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp
@@ -10,60 +10,87 @@
 #include <cassert>
 #include <algorithm>
 #include "ie_parallel.hpp"
+#include <ngraph/opsets/opset1.hpp>
+
+using namespace MKLDNNPlugin;
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
 class ReverseSequenceImpl: public ExtLayerBase {
-public:
-    explicit ReverseSequenceImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() != 2 || layer->outData.size() != 1)
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
+            const auto revSeq = std::dynamic_pointer_cast<const ngraph::opset1::ReverseSequence>(op);
+            if (!revSeq) {
+                errorMessage = "Only opset1 ReverseSequence operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            src_dims = layer->insData[REVERSESEQUENCE_DATA].lock()->getTensorDesc().getDims();
+    std::string errorPrefix;
 
-            Precision lengthsPrecision = layer->insData[REVERSESEQUENCE_LENGTHS].lock()->getTensorDesc().getPrecision();
+public:
+    explicit ReverseSequenceImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            errorPrefix = "ReverseSequence layer with name '" + op->get_friendly_name() + "'";
+            const auto revSeq = std::dynamic_pointer_cast<const ngraph::opset1::ReverseSequence>(op);
+
+            if (op->get_input_size() != 2 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+            src_dims = op->get_input_shape(REVERSESEQUENCE_DATA);
+
+            Precision lengthsPrecision = details::convertPrecision(op->get_input_element_type(REVERSESEQUENCE_LENGTHS));
             if (lengthsPrecision != Precision::I32 && lengthsPrecision != Precision::FP32)
                 lengthsPrecision = Precision::I32;
 
-            SizeVector seq_lengths_dims = layer->insData[REVERSESEQUENCE_LENGTHS].lock()->getTensorDesc().getDims();
-            if (seq_lengths_dims.size() > 1)
-                IE_THROW() << layer->name << " Seq_lengths vector should be 1 dimension";
+            SizeVector seq_lengths_dims = op->get_input_shape(REVERSESEQUENCE_LENGTHS);
+            if (seq_lengths_dims.size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect 2nd input rank: " << seq_lengths_dims.size();
 
-            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
+            SizeVector dst_dims = op->get_output_shape(0);
             if (src_dims.size() != dst_dims.size())
-                IE_THROW() << layer->name << " Incorrect number of input/output sizes!";
+                IE_THROW() << errorPrefix << " has incorrect number of input/output sizes!";
 
             for (size_t i = 0; i < dst_dims.size(); i++) {
                 if (src_dims[i] != dst_dims[i])
-                    IE_THROW() << layer->name << " Incorrect number of input/output dimension!";
+                    IE_THROW() << errorPrefix << " has incorrect number of input/output dimension!";
             }
 
-            seq_axis = layer->GetParamAsInt("seq_axis", 1);
-            if (seq_axis < 0)
-                seq_axis += src_dims.size();
+            seq_axis = revSeq->get_sequence_axis();
 
             if (seq_axis < 0 || seq_axis >= static_cast<int>(src_dims.size()))
-                IE_THROW() << layer->name << " Incorrect 'seq_axis' parameters dimensions and axis number!";
+                IE_THROW() << errorPrefix << " has incorrect 'seq_axis' parameters dimensions and axis number!";
 
-            batch_axis = layer->GetParamAsInt("batch_axis", 0);
-            if (batch_axis < 0)
-                batch_axis += src_dims.size();
+            batch_axis = revSeq->get_batch_axis();
 
             if (batch_axis < 0 || batch_axis >= static_cast<int>(src_dims.size()))
-                IE_THROW() << layer->name << " Incorrect 'batch_axis' parameters dimensions and axis number!";
+                IE_THROW() << errorPrefix << " has incorrect 'batch_axis' parameters dimensions and axis number!";
 
             if (seq_lengths_dims[0] != dst_dims[batch_axis])
-                IE_THROW() << layer->name << " Incorrect 'seq_lengths_dims' parameters dimension!";
+                IE_THROW() << errorPrefix << " has incorrect 'seq_lengths_dims' parameters dimension!";
+
+            srcStrides.resize(src_dims.size());
+            srcStrides[srcStrides.size() - 1] = 1;
+            for (int i = srcStrides.size() - 2; i >= 0; i--) {
+                srcStrides[i] = srcStrides[i + 1] * src_dims[i + 1];
+            }
 
-            srcStrides = layer->insData[REVERSESEQUENCE_DATA].lock()->getTensorDesc().getBlockingDesc().getStrides();
             work_amount_dst = srcStrides[0] * src_dims[0];
 
-            addConfig(layer,
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, lengthsPrecision) },
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32) });
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                           {TensorDescCreatorTypes::ncsp, lengthsPrecision}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
         }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
index d372c85989a..102a9bbd6cb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp
@@ -15,6 +15,9 @@
 #include <algorithm>
 #include "ie_parallel.hpp"
 #include "common/cpu_memcpy.h"
+#include <ngraph/opsets/opset6.hpp>
+
+using MKLDNNPlugin::TensorDescCreatorTypes;
 
 namespace InferenceEngine {
 namespace Extensions {
@@ -318,19 +321,40 @@ private:
     const int OUTPUT_ROI_FEATURES {0};
     const int OUTPUT_ROIS {1};
 
-public:
-    explicit ExperimentalDetectronROIFeatureExtractorImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            output_dim_ = layer->GetParamAsInt("output_size");
-            pyramid_scales_ = layer->GetParamAsInts("pyramid_scales");
-            sampling_ratio_ = layer->GetParamAsInt("sampling_ratio");
-            aligned_ = layer->GetParamAsBool("aligned", false);
+            const auto roiFeatureExtractor = std::dynamic_pointer_cast<const ngraph::opset6::ExperimentalDetectronROIFeatureExtractor>(op);
+            if (!roiFeatureExtractor) {
+                errorMessage = "Only opset6 ExperimentalDetectronROIFeatureExtractor operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
+
+public:
+    explicit ExperimentalDetectronROIFeatureExtractorImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+
+            const auto roiFeatureExtractor = std::dynamic_pointer_cast<const ngraph::opset6::ExperimentalDetectronROIFeatureExtractor>(op);
+            const auto &attr = roiFeatureExtractor->get_attrs();
+            output_dim_ = attr.output_size;
+            pyramid_scales_ = attr.pyramid_scales;
+            sampling_ratio_ = attr.sampling_ratio;
+            aligned_ = attr.aligned;
             pooled_height_ = output_dim_;
             pooled_width_ = output_dim_;
 
-            std::vector<DataConfigurator> inputs_layouts(layer->insData.size(), DataConfigurator(ConfLayout::PLN, Precision::FP32));
-            std::vector<DataConfigurator> outputs_layouts(layer->outData.size(), DataConfigurator(ConfLayout::PLN, Precision::FP32));
-            addConfig(layer, inputs_layouts, outputs_layouts);
+            std::vector<DataConfigurator> inDataConfigurators(op->get_input_size(), DataConfigurator{TensorDescCreatorTypes::ncsp, Precision::FP32});
+            addConfig(op, inDataConfigurators,
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                           {TensorDescCreatorTypes::ncsp, Precision::FP32}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
         }
@@ -397,7 +421,7 @@ private:
     int output_dim_ = 0;
     int pooled_height_ = 0;
     int pooled_width_ = 0;
-    std::vector<int> pyramid_scales_;
+    std::vector<int64_t> pyramid_scales_;
     int sampling_ratio_ = 0;
     bool aligned_ = false;
 };
diff --git a/inference-engine/src/mkldnn_plugin/nodes/select.cpp b/inference-engine/src/mkldnn_plugin/nodes/select.cpp
deleted file mode 100644
index e23b32ab381..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/select.cpp
+++ /dev/null
@@ -1,229 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <string>
-#include <vector>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class SelectImpl: public ExtLayerBase {
-    enum { CONDITION, THEN, ELSE, numOfInputs };
-    enum { N, C, D, H, W, numOfDims };
-
-    std::string broadcast;
-    std::vector<size_t> resDims;
-    std::vector<size_t> resOffset;
-    std::vector<size_t> condOffset;
-    std::vector<size_t> thenOffset;
-    std::vector<size_t> elseOffset;
-
-public:
-    explicit SelectImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != numOfInputs || layer->outData.size() != 1)
-                IE_THROW() << "Select layer with name '" << layer->name << "' has incorrect number of input/output edges!";
-
-            broadcast = layer->GetParamAsString("auto_broadcast", "numpy");
-
-            auto inputPrecision = layer->insData[THEN].lock()->getTensorDesc().getPrecision();
-            if (inputPrecision == Precision::BF16 || layer->insData[ELSE].lock()->getTensorDesc().getPrecision() == Precision::BF16) {
-                inputPrecision = Precision::BF16;
-            } else if (layer->insData[THEN].lock()->getTensorDesc().getPrecision() != layer->insData[ELSE].lock()->getTensorDesc().getPrecision()) {
-                IE_THROW() << "Select layer with name '" << layer->name << "' has different precisions on 'Then' and 'Else' inputs ";
-            }
-
-            const auto& conditionPrecision = layer->insData[CONDITION].lock()->getTensorDesc().getPrecision();
-            if (conditionPrecision != Precision::BOOL && conditionPrecision != Precision::I32  && conditionPrecision != Precision::U8)
-                IE_THROW() << "Select layer with name '" << layer->name << "' has unsupported precision: " << conditionPrecision
-                                                                                                                << " on 'Condition' input";
-
-            const auto& inputPrecisionSize = layer->insData[THEN].lock()->getTensorDesc().getPrecision().size();
-            if (inputPrecisionSize != 1 && inputPrecisionSize != 2 && inputPrecisionSize != 4 && inputPrecisionSize != 8)
-                IE_THROW() << "Select layer with name '" << layer->name << "' has unsupported precision: " <<
-                                                        layer->insData[THEN].lock()->getTensorDesc().getPrecision() << " on 'Then' and 'Else' inputs";
-
-            const auto &conditionShapes = layer->insData[CONDITION].lock()->getTensorDesc().getDims();
-            const auto &thenShapes = layer->insData[THEN].lock()->getTensorDesc().getDims();
-            const auto &elseShapes = layer->insData[ELSE].lock()->getTensorDesc().getDims();
-            const auto &outputShapes = layer->outData[0]->getTensorDesc().getDims();
-
-            if (broadcast != "none" && broadcast != "numpy")
-                IE_THROW() << "Select layer with name '" << layer->name << "' has unsupported broadcast type: " << broadcast;
-
-            if (broadcast == "none" && ((conditionShapes != outputShapes) || (thenShapes != outputShapes) || (elseShapes != outputShapes)))
-                IE_THROW() << "Select layer with name '" << layer->name << "' and auto_broadcast='none' has input shapes mismatch";
-
-            if (broadcast == "numpy") {
-                if (outputShapes.size() < conditionShapes.size() || outputShapes.size() < thenShapes.size() || outputShapes.size() < elseShapes.size())
-                    IE_THROW() << "Select layer with name '" << layer->name << "' and auto_broadcast='numpy' has incompatible input and output shapes";
-
-                for (int condIt = conditionShapes.size() - 1, outIt = outputShapes.size() - 1; condIt >= 0; condIt--, outIt--)
-                        if (conditionShapes[condIt] != outputShapes[outIt] && conditionShapes[condIt] != 1)
-                            IE_THROW() << "Select layer with name '" << layer->name
-                                                                        << "' and auto_broadcast='numpy' has incompatible 'Condition' input and output shapes";
-
-                for (int thenIt = thenShapes.size() - 1, outIt = outputShapes.size() - 1; thenIt >= 0; thenIt--, outIt--)
-                        if (thenShapes[thenIt] != outputShapes[outIt] && thenShapes[thenIt] != 1)
-                            IE_THROW() << "Select layer with name '" << layer->name
-                                                                            << "' and auto_broadcast='numpy' has incompatible 'Then' input and output shapes";
-
-
-                for (int elseIt = elseShapes.size() - 1, outIt = outputShapes.size() - 1; elseIt >= 0; elseIt--, outIt--)
-                        if (elseShapes[elseIt] != outputShapes[outIt] && elseShapes[elseIt] != 1)
-                            IE_THROW() << "Select layer with name '" << layer->name
-                                                                             << "' and auto_broadcast='numpy' has incompatible 'Else' input and output shapes";
-            }
-
-            resDims.resize(numOfDims, 1);
-            std::copy(std::begin(outputShapes), std::end(outputShapes), std::begin(resDims) + (numOfDims - outputShapes.size()));
-            if (broadcast == "numpy") {
-                calcOutOffset(resOffset, resDims);
-
-                std::vector<size_t> condDims(numOfDims, 1);
-                std::copy(std::begin(conditionShapes), std::end(conditionShapes), std::begin(condDims) + (numOfDims - conditionShapes.size()));
-                calcInOffset(condOffset, condDims, resDims);
-
-                std::vector<size_t> thenDims(numOfDims, 1);
-                std::copy(std::begin(thenShapes), std::end(thenShapes), std::begin(thenDims) + (numOfDims - thenShapes.size()));
-                calcInOffset(thenOffset, thenDims, resDims);
-
-                std::vector<size_t> elseDims(numOfDims, 1);
-                std::copy(std::begin(elseShapes), std::end(elseShapes), std::begin(elseDims) + (numOfDims - elseShapes.size()));
-                calcInOffset(elseOffset, elseDims, resDims);
-            }
-
-            LayerConfig config;
-            for (size_t i = 0; i < numOfInputs; i++) {
-                DataConfig inConfig;
-                inConfig.inPlace = -1;
-                inConfig.constant = false;
-
-                Precision inPrecision = i == CONDITION ? conditionPrecision : inputPrecision;
-                const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims();
-                inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
-
-                config.inConfs.push_back(inConfig);
-            }
-
-            DataConfig outConfig;
-            outConfig.inPlace = -1;
-            outConfig.constant = false;
-            const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
-            outConfig.desc = TensorDesc(inputPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
-            config.outConfs.push_back(outConfig);
-
-            config.dynBatchSupport = false;
-            confs.push_back(config);
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        auto &outputData = outputs[0];
-        const size_t condPrecSize = inputs[CONDITION]->getTensorDesc().getPrecision().size();
-        const size_t inputsPrecSize = inputs[THEN]->getTensorDesc().getPrecision().size();
-
-        switch (condPrecSize) {
-            case 1: {
-                switch (inputsPrecSize) {
-                    case 1: { execute_impl<uint8_t, uint8_t>(inputs, outputData); break; }
-                    case 2: { execute_impl<uint8_t, uint16_t>(inputs, outputData); break; }
-                    case 4: { execute_impl<uint8_t, uint32_t>(inputs, outputData); break; }
-                    case 8: { execute_impl<uint8_t, uint64_t>(inputs, outputData); break; }
-                    default: {
-                        if (resp) {
-                            std::string errorMsg = "Select layer doesn't support 'Then' and 'Else' inputs' precision: "
-                                                                                        + std::string(inputs[THEN]->getTensorDesc().getPrecision().name());
-                                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                        }
-                        return GENERAL_ERROR;
-                    }
-                }
-                break;
-            }
-            case 4: {
-                switch (inputsPrecSize) {
-                    case 1: { execute_impl<int32_t, uint8_t>(inputs, outputData); break; }
-                    case 2: { execute_impl<int32_t, uint16_t>(inputs, outputData); break; }
-                    case 4: { execute_impl<int32_t, uint32_t>(inputs, outputData); break; }
-                    case 8: { execute_impl<int32_t, uint64_t>(inputs, outputData); break; }
-                    default: {
-                        if (resp) {
-                            std::string errorMsg = "Select layer doesn't support 'Then' and 'Else' inputs' precision: "
-                                                                                        + std::string(inputs[THEN]->getTensorDesc().getPrecision().name());
-                                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                        }
-                        return GENERAL_ERROR;
-                    }
-                }
-                break;
-            }
-            default: {
-                if (resp) {
-                    std::string errorMsg = "Select layer doesn't support 'Condition' inputs' precision: "
-                                                                                    + std::string(inputs[CONDITION]->getTensorDesc().getPrecision().name());
-                        errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                }
-                return GENERAL_ERROR;
-            }
-        }
-
-        return OK;
-    }
-
-private:
-    void calcOutOffset(std::vector<size_t>& offset, const std::vector<size_t>& dims) {
-        offset.resize(numOfDims);
-        int k = 1;
-        for (int i = dims.size() - 1; i >= 0; i--) {
-            offset[i] = k;
-            k *= dims[i];
-        }
-    }
-
-    void calcInOffset(std::vector<size_t>& offset, const std::vector<size_t>& inDims, const std::vector<size_t>& outDims) {
-        offset.resize(numOfDims);
-        int k = 1;
-        for (int i = inDims.size() - 1; i >= 0; i--) {
-            offset[i] = (inDims[i] == outDims[i]) ? k : 0;
-            k *= inDims[i];
-        }
-    }
-
-    template <typename COND_T, typename DATA_T>
-    void execute_impl(std::vector<Blob::Ptr>& inputs, Blob::Ptr& output) noexcept {
-        auto *conditionData = inputs[CONDITION]->cbuffer().as<const COND_T *>() + inputs[CONDITION]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        auto *thenData = inputs[THEN]->cbuffer().as<const DATA_T *>() + inputs[THEN]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        auto *elseData = inputs[ELSE]->cbuffer().as<const DATA_T *>() + inputs[ELSE]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        auto *dstData = output->buffer().as<DATA_T *>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        if (broadcast == "none") {
-            size_t dstDataSize = std::accumulate(begin(resDims), end(resDims), 1, std::multiplies<size_t>());
-            parallel_for(dstDataSize, [&](size_t i) {
-                dstData[i] = conditionData[i] ? thenData[i] : elseData[i];
-            });
-        } else {
-            parallel_for4d(resDims[N], resDims[C], resDims[D], resDims[H], [&](int b, int c, int d, int h) {
-                for (int w = 0; w < resDims[W]; w++) {
-                    size_t indexOut = b * resOffset[N] + c * resOffset[C] + d * resOffset[D] + h * resOffset[H] + w * resOffset[W];
-                    size_t indexCond = b * condOffset[N] + c * condOffset[C] + d * condOffset[D] + h * condOffset[H] + w * condOffset[W];
-                    size_t indexThen = b * thenOffset[N] + c * thenOffset[C] + d * thenOffset[D] + h * thenOffset[H] + w * thenOffset[W];
-                    size_t indexElse = b * elseOffset[N] + c * elseOffset[C] + d * elseOffset[D] + h * elseOffset[H] + w * elseOffset[W];
-                    dstData[indexOut] = conditionData[indexCond] ? thenData[indexThen] : elseData[indexElse];
-                }
-            });
-        }
-    }
-};
-
-REG_FACTORY_FOR(SelectImpl, Select);
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp b/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
index e606655a00a..bec57b38a39 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp
@@ -11,11 +11,15 @@
 #include <cassert>
 #include "ie_parallel.hpp"
 #include "common/cpu_memcpy.h"
+#include <ngraph/op/shuffle_channels.hpp>
+#include "common/tensor_desc_creator.h"
 
 namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class ShuffleChannelsImpl: public ExtLayerBase {
 #define CNTR_SIZE 3
 
@@ -50,65 +54,67 @@ __inline size_t updater(size_t idx, size_t size, size_t* counters, size_t* own_d
 }
 
 public:
-    explicit ShuffleChannelsImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
+            auto scOp = ngraph::as_type_ptr<const ngraph::op::v0::ShuffleChannels>(op);
+            if (!scOp) {
+                errorMessage = "Node is not an instance of the TopK from the operations set v1.";
+                return false;
+            }
 
-            SizeVector src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
-            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
-            if (src_dims.size() != dst_dims.size())
-                IE_THROW() << layer->name << " Incorrect number of input/output dimensions!";
+            if (_supported_precisions_sizes.find(op->get_input_element_type(0).size()) == _supported_precisions_sizes.end()) {
+                errorMessage = "Unsupported precision: " + op->get_input_element_type(0).get_type_name();
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            const auto precision = layer->insData[0].lock()->getTensorDesc().getPrecision();
-            if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end())
-                IE_THROW() << layer->name << "has unsupported precision: " << precision.name();
+    explicit ShuffleChannelsImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+            auto scOp = ngraph::as_type_ptr<const ngraph::op::v0::ShuffleChannels>(op);
+            auto& dstDims = op->get_output_shape(0);
 
-            int axis = layer->GetParamAsInt("axis", 1);
+            int64_t axis = scOp->get_axis();
             if (axis < 0)
-                axis += dst_dims.size();
+                axis += dstDims.size();
 
-            if (axis < 0 || axis >= static_cast<int>(dst_dims.size()))
-                IE_THROW() << layer->name << " Incorrect input parameters dimensions and axis number!";
+            if (axis < 0 || axis >= static_cast<int64_t>(dstDims.size()))
+                IE_THROW() << op->get_friendly_name() << " Incorrect input parameters dimensions and axis number!";
 
-            size_t group = layer->GetParamAsUInt("group", 1);
-            if (group == 0 || dst_dims[axis] % group)
-                IE_THROW() << layer->name << " Group parameter must evenly divide the channel dimension!";
+            size_t group = scOp->get_group();
+            if (group == 0 || dstDims[axis] % group)
+                IE_THROW() << op->get_friendly_name() << " Group parameter must evenly divide the channel dimension!";
 
             //  Find number of dictionaries, index range and data length
             own_dims[0] = 1;
             for (int i = 0; i < axis; i++)
-                own_dims[0] *= dst_dims[i];
+                own_dims[0] *= dstDims[i];
 
-            for (size_t i = axis + 1; i < dst_dims.size(); i++)
-                dataLength *= dst_dims[i];
+            for (size_t i = axis + 1; i < dstDims.size(); i++)
+                dataLength *= dstDims[i];
 
             if (dataLength == 0)
-                IE_THROW() << layer->name << " Incorrect input parameters dimension!";
+                IE_THROW() << op->get_friendly_name() << " Incorrect input parameters dimension!";
 
-            own_dims[1] = dst_dims[axis] / group;
+            own_dims[1] = dstDims[axis] / group;
             own_dims[2] = group;
-            ownStrides[0] = dst_dims[axis];
+            ownStrides[0] = dstDims[axis];
             ownStrides[1] = 1;
             ownStrides[2] = own_dims[1];
             work_amount_dst = ownStrides[0] * own_dims[0];
 
-            LayerConfig config;
-            DataConfig inConfig;
-            inConfig.desc = layer->insData[0].lock()->getTensorDesc();
-
-            config.inConfs.push_back(inConfig);
-
-            DataConfig outConfig;
-            outConfig.desc = layer->outData[0]->getTensorDesc();
-            outConfig.desc.setPrecision(inConfig.desc.getPrecision());
-            outConfig.desc.setLayout(inConfig.desc.getLayout());
-            config.outConfs.push_back(outConfig);
-
-            config.dynBatchSupport = false;
-            confs.push_back(config);
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, details::convertPrecision(op->get_input_element_type(0))}},
+                          {{TensorDescCreatorTypes::ncsp, details::convertPrecision(op->get_input_element_type(0))}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
+            throw;
         }
     }
 
@@ -146,9 +152,9 @@ public:
     template<typename T>
     void process_data(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs) noexcept {
         const T* src_data = inputs[0]->cbuffer().as<const T*>() +
-                                inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+                            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
         T* dst_data = outputs[0]->cbuffer().as<T*>() +
-                          outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+                      outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
 
         if (dataLength > 1) {
             //  Vectorized & Parallel
diff --git a/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp b/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp
deleted file mode 100644
index de05e2403e9..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp
+++ /dev/null
@@ -1,338 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <algorithm>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-struct simpler_nms_roi_t {
-    float x0, y0, x1, y1;
-
-    static inline float clamp_v(const float v, const float v_min, const float v_max) {
-        return (std::max)(v_min, (std::min)(v, v_max));
-    }
-
-    float area() const { return std::max<float>(0, y1 - y0 + 1) * std::max<float>(0, x1 - x0 + 1); }
-
-    simpler_nms_roi_t intersect(simpler_nms_roi_t other) const {
-        return {
-            (std::max)(x0, other.x0),
-            (std::max)(y0, other.y0),
-            (std::min)(x1, other.x1),
-            (std::min)(y1, other.y1)
-        };
-    }
-    simpler_nms_roi_t clamp(simpler_nms_roi_t other) const {
-        return {
-            clamp_v(x0, other.x0, other.x1),
-            clamp_v(y0, other.y0, other.y1),
-            clamp_v(x1, other.x0, other.x1),
-            clamp_v(y1, other.y0, other.y1)
-        };
-    }
-};
-
-struct simpler_nms_delta_t { float shift_x, shift_y, log_w, log_h; };
-struct simpler_nms_proposal_t { simpler_nms_roi_t roi; float confidence; size_t ord; };
-struct simpler_nms_anchor { float start_x; float start_y; float end_x; float end_y; };
-
-
-static void CalcBasicParams(const simpler_nms_anchor& base_anchor,
-        float& width, float& height, float& x_center, float& y_center) {
-    width  = base_anchor.end_x - base_anchor.start_x + 1.0f;
-    height = base_anchor.end_y - base_anchor.start_y + 1.0f;
-
-    x_center = base_anchor.start_x + 0.5f * (width - 1.0f);
-    y_center = base_anchor.start_y + 0.5f * (height - 1.0f);
-}
-
-
-static void MakeAnchors(const std::vector<float>& ws, const std::vector<float>& hs,
-                        float x_center, float y_center, std::vector<simpler_nms_anchor>& anchors) {
-    unsigned int len = ws.size();
-    anchors.clear();
-    anchors.resize(len);
-
-    for (unsigned int i = 0 ; i < len ; i++) {
-        // transpose to create the anchor
-        anchors[i].start_x = x_center - 0.5f * (ws[i] - 1.0f);
-        anchors[i].start_y = y_center - 0.5f * (hs[i] - 1.0f);
-        anchors[i].end_x   = x_center + 0.5f * (ws[i] - 1.0f);
-        anchors[i].end_y   = y_center + 0.5f * (hs[i] - 1.0f);
-    }
-}
-
-
-static void CalcAnchors(const simpler_nms_anchor& base_anchor, const std::vector<float>& scales,
-                        std::vector<simpler_nms_anchor>& anchors) {
-    float width = 0.0f, height = 0.0f, x_center = 0.0f, y_center = 0.0f;
-
-    CalcBasicParams(base_anchor, width, height, x_center, y_center);
-
-    unsigned int num_scales = scales.size();
-    std::vector<float> ws(num_scales), hs(num_scales);
-
-    for (unsigned int i = 0 ; i < num_scales ; i++) {
-        ws[i] = width * scales[i];
-        hs[i] = height * scales[i];
-    }
-
-    MakeAnchors(ws, hs, x_center, y_center, anchors);
-}
-
-
-static void CalcRatioAnchors(const simpler_nms_anchor& base_anchor, const std::vector<float>& ratios,
-                             std::vector<simpler_nms_anchor>& ratio_anchors) {
-    float width = 0.0f, height = 0.0f, x_center = 0.0f, y_center = 0.0f;
-
-    CalcBasicParams(base_anchor, width, height, x_center, y_center);
-
-    float size = width * height;
-
-    unsigned int num_ratios = ratios.size();
-
-    std::vector<float> ws(num_ratios), hs(num_ratios);
-
-    for (unsigned int i = 0 ; i < num_ratios ; i++) {
-        float new_size = size / ratios[i];
-        ws[i] = round(sqrt(new_size));
-        hs[i] = round(ws[i] * ratios[i]);
-    }
-
-    MakeAnchors(ws, hs, x_center, y_center, ratio_anchors);
-}
-
-void GenerateAnchors(unsigned int base_size, const std::vector<float>& ratios,
-        const std::vector<float> scales, simpler_nms_anchor *anchors) {
-    float end = static_cast<float>(base_size - 1);  // because we start at zero
-
-    simpler_nms_anchor base_anchor = {0.0f, 0.0f, end, end};
-
-    std::vector<simpler_nms_anchor> ratio_anchors;
-    CalcRatioAnchors(base_anchor, ratios, ratio_anchors);
-
-    for (size_t i = 0, index = 0; i < ratio_anchors.size() ; i++) {
-        std::vector<simpler_nms_anchor> temp_anchors;
-        CalcAnchors(ratio_anchors[i], scales, temp_anchors);
-
-        for (size_t j = 0 ; j < temp_anchors.size() ; j++) {
-            anchors[index++] = temp_anchors[j];
-        }
-    }
-}
-
-std::vector<simpler_nms_roi_t> simpler_nms_perform_nms(
-        const std::vector<simpler_nms_proposal_t>& proposals,
-        float iou_threshold,
-        size_t top_n) {
-    std::vector<simpler_nms_roi_t> res;
-    res.reserve(top_n);
-    for (const auto & prop : proposals) {
-        const auto bbox = prop.roi;
-        const float area = bbox.area();
-
-        // For any realistic WL, this condition is true for all top_n values anyway
-        if (prop.confidence > 0) {
-            bool overlaps = std::any_of(res.begin(), res.end(), [&](const simpler_nms_roi_t& res_bbox) {
-                float interArea = bbox.intersect(res_bbox).area();
-                float unionArea = res_bbox.area() + area - interArea;
-                return interArea > iou_threshold * unionArea;
-            });
-
-            if (!overlaps) {
-                res.push_back(bbox);
-                if (res.size() == top_n) break;
-            }
-        }
-    }
-
-    return res;
-}
-
-inline void sort_and_keep_at_most_top_n(
-        std::vector<simpler_nms_proposal_t>& proposals,
-        size_t top_n) {
-    const auto cmp_fn = [](const simpler_nms_proposal_t& a,
-                           const simpler_nms_proposal_t& b) {
-        return a.confidence > b.confidence || (a.confidence == b.confidence && a.ord > b.ord);
-    };
-
-    if (proposals.size() > top_n) {
-        std::partial_sort(proposals.begin(), proposals.begin() + top_n, proposals.end(), cmp_fn);
-        proposals.resize(top_n);
-    } else {
-        std::sort(proposals.begin(), proposals.end(), cmp_fn);
-    }
-}
-
-inline simpler_nms_roi_t simpler_nms_gen_bbox(
-        const simpler_nms_anchor& box,
-        const simpler_nms_delta_t& delta,
-        int anchor_shift_x,
-        int anchor_shift_y) {
-    auto anchor_w = box.end_x - box.start_x + 1;
-    auto anchor_h = box.end_y - box.start_y + 1;
-    auto center_x = box.start_x + anchor_w * .5f;
-    auto center_y = box.start_y + anchor_h *.5f;
-
-    float pred_center_x = delta.shift_x * anchor_w + center_x + anchor_shift_x;
-    float pred_center_y = delta.shift_y * anchor_h + center_y + anchor_shift_y;
-    float half_pred_w = exp(delta.log_w) * anchor_w * .5f;
-    float half_pred_h = exp(delta.log_h) * anchor_h * .5f;
-
-    return { pred_center_x - half_pred_w,
-             pred_center_y - half_pred_h,
-             pred_center_x + half_pred_w,
-             pred_center_y + half_pred_h };
-}
-
-class SimplerNMSImpl : public ExtLayerBase {
-public:
-    explicit SimplerNMSImpl(const CNNLayer *layer) {
-        try {
-            if (layer->insData.size() != 3 || layer->outData.size() != 1)
-                IE_THROW() << "Incorrect number of input/output edges!";
-
-            if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4)
-                IE_THROW() << "SimplerNMS supports only 4D blobs!";
-
-            min_box_size_ = layer->GetParamAsInt("min_bbox_size");
-            feat_stride_ = layer->GetParamAsInt("feat_stride");
-            pre_nms_topn_ = layer->GetParamAsInt("pre_nms_topn");
-            post_nms_topn_ = layer->GetParamAsInt("post_nms_topn");
-            iou_threshold_ = layer->GetParamAsFloat("iou_threshold");
-            scales = layer->GetParamAsFloats("scale", {});
-
-            unsigned int default_size = 16;
-
-            ratios = {0.5f, 1.0f, 2.0f};
-
-            anchors_.resize(ratios.size() * scales.size());
-            simpler_nms_anchor *anchors = &anchors_[0];
-
-            GenerateAnchors(default_size, ratios, scales, anchors);
-
-            // Fill config information
-            if (layer->outData[0]->getTensorDesc().getDims().size() != 2 ||
-                    layer->insData[0].lock()->getTensorDesc().getDims().size() != 4)
-                IE_THROW() << "Unsupported dimensions!";
-
-            addConfig(layer, {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                DataConfigurator(ConfLayout::PLN, Precision::FP32)}, {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs,
-                       ResponseDesc *resp) noexcept override {
-        int cls_idx = 0;
-        int delta_idx = 1;
-
-        Blob::Ptr src_cls = inputs[cls_idx];
-        Blob::Ptr src_delta = inputs[delta_idx];
-
-        if (src_cls->getTensorDesc().getDims()[1] > src_delta->getTensorDesc().getDims()[1]) {
-            cls_idx = 1;
-            delta_idx = 0;
-
-            src_cls = inputs[cls_idx];
-            src_delta = inputs[delta_idx];
-        }
-
-        int anchors_num = 3 * 3;
-        const auto * anchors = (const simpler_nms_anchor*)&anchors_[0];
-
-        int H = src_cls->getTensorDesc().getDims()[2];
-        int W = src_cls->getTensorDesc().getDims()[3];
-
-        int SZ = H * W;
-
-        float *dst = outputs[0]->buffer().as<float*>();
-
-        const float* cls_scores = src_cls->buffer().as<const float*>();
-        const float* delta_pred = src_delta->buffer().as<const float*>();
-        const float* im_info = inputs[2]->buffer().as<const float*>();
-
-        int IW = static_cast<int>(im_info[1]);
-        int IH = static_cast<int>(im_info[0]);
-        int IS = static_cast<int>(im_info[2]);
-
-        int scaled_min_bbox_size = min_box_size_ * IS;
-
-        std::vector<simpler_nms_proposal_t> sorted_proposals_confidence;
-
-        for (auto y = 0; y < H; ++y) {
-            int anchor_shift_y = y * feat_stride_;
-
-            for (auto x = 0; x < W; ++x) {
-                int anchor_shift_x = x * feat_stride_;
-                int location_index = y * W + x;
-
-                // we assume proposals are grouped by window location
-                for (int anchor_index = 0; anchor_index < anchors_num ; anchor_index++) {
-                    float dx0 = delta_pred[location_index + SZ * (anchor_index * 4 + 0)];
-                    float dy0 = delta_pred[location_index + SZ * (anchor_index * 4 + 1)];
-                    float dx1 = delta_pred[location_index + SZ * (anchor_index * 4 + 2)];
-                    float dy1 = delta_pred[location_index + SZ * (anchor_index * 4 + 3)];
-
-                    simpler_nms_delta_t bbox_delta { dx0, dy0, dx1, dy1 };
-
-                    float proposal_confidence =
-                            cls_scores[location_index + SZ * (anchor_index + anchors_num * 1)];
-
-                    simpler_nms_roi_t tmp_roi = simpler_nms_gen_bbox(anchors[anchor_index], bbox_delta, anchor_shift_x, anchor_shift_y);
-                    simpler_nms_roi_t roi = tmp_roi.clamp({ 0, 0, static_cast<float>(IW - 1), static_cast<float>(IH - 1)});
-
-                    int bbox_w = static_cast<int>(roi.x1 - roi.x0) + 1;
-                    int bbox_h = static_cast<int>(roi.y1 - roi.y0) + 1;
-
-                    if (bbox_w >= scaled_min_bbox_size && bbox_h >= scaled_min_bbox_size) {
-                        simpler_nms_proposal_t proposal { roi, proposal_confidence, sorted_proposals_confidence.size() };
-                        sorted_proposals_confidence.push_back(proposal);
-                    }
-                }
-            }
-        }
-
-        sort_and_keep_at_most_top_n(sorted_proposals_confidence, pre_nms_topn_);
-        auto res = simpler_nms_perform_nms(sorted_proposals_confidence, iou_threshold_, post_nms_topn_);
-
-        size_t res_num_rois = res.size();
-
-        for (size_t i = 0; i < res_num_rois; ++i) {
-            dst[5 * i + 0] = 0;    // roi_batch_ind, always zero on test time
-            dst[5 * i + 1] = res[i].x0;
-            dst[5 * i + 2] = res[i].y0;
-            dst[5 * i + 3] = res[i].x1;
-            dst[5 * i + 4] = res[i].y1;
-        }
-        return OK;
-    }
-
-private:
-    int min_box_size_;
-    int feat_stride_;
-    int pre_nms_topn_;
-    int post_nms_topn_;
-    float iou_threshold_;
-
-    std::vector<float> scales;
-    std::vector<float> ratios;
-
-    std::vector<simpler_nms_anchor> anchors_;
-};
-
-REG_FACTORY_FOR(SimplerNMSImpl, SimplerNMS);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp b/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp
deleted file mode 100644
index 24038d430bf..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-#include "ie_parallel.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <set>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class SpaceToBatchImpl: public ExtLayerBase {
-public:
-    explicit SpaceToBatchImpl(const CNNLayer* layer) {
-        try {
-            auto spaceToBatchLayer = dynamic_cast<const SpaceToBatchLayer*>(layer);
-            if (!spaceToBatchLayer)
-                IE_THROW() << "SpaceToBatch layer with name '" << layer->name << "' isn't instance of SpaceToBatchLayer class";
-
-            if (spaceToBatchLayer->insData.size() != 4)
-                IE_THROW() << "SpaceToBatch layer with name '" << spaceToBatchLayer->name << "' has incorrect number of input edges";
-
-            if (spaceToBatchLayer->outData.size() != 1)
-                IE_THROW() << "SpaceToBatch layer with name '" << spaceToBatchLayer->name << "' has incorrect number of output edges";
-
-            auto data = spaceToBatchLayer->insData[0].lock();
-            if (!data)
-                IE_THROW() << "SpaceToBatch layer with name '" << spaceToBatchLayer->name << "' has nullable input data";
-
-            inDims = data->getTensorDesc().getDims();
-            if (inDims.size() < 4)
-                IE_THROW() << "SpaceToBatch layer with name '" << spaceToBatchLayer->name << "' doesn't support dimensions with rank less than 4";
-
-            if (inDims.size() > 5)
-                IE_THROW() << "SpaceToBatch layer with name '" << spaceToBatchLayer->name << "' doesn't support dimensions with rank greater than 5";
-
-            outDims = spaceToBatchLayer->outData[0]->getTensorDesc().getDims();
-            if (inDims.size() != outDims.size())
-                IE_THROW() << "SpaceToBatch layer with name '" << spaceToBatchLayer->name << "' has incorrect number of input/output dimensions";
-
-            const auto precision = data->getTensorDesc().getPrecision();
-            const std::set<size_t> supported_precision_sizes = {1, 2, 4, 8};
-            if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end())
-                IE_THROW() << "SpaceToBatch layer with name '" << spaceToBatchLayer->name << "' has unsupported precision: " << precision.name();
-
-            blockShapeIn = spaceToBatchLayer->_block_shape;
-            padsBeginIn = spaceToBatchLayer->_pads_begin;
-
-            auto createConfig = [&](Layout layout) {
-                LayerConfig config;
-                // TODO: remove Const layers
-                for (int i = 0; i < spaceToBatchLayer->insData.size(); i++) {
-                    auto inData = spaceToBatchLayer->insData[i].lock();
-                    if (!inData)
-                        IE_THROW() << "SpaceToBatch layer with name '" << spaceToBatchLayer->name << "' has nullable input data";
-                    DataConfig inConfig;
-                    if (i == 0)
-                        inConfig.desc = TensorDesc(precision, inData->getTensorDesc().getDims(), layout);
-                    else
-                        inConfig.desc = TensorDesc(inData->getPrecision(), inData->getTensorDesc().getDims(), inData->getTensorDesc().getLayout());
-                    config.inConfs.push_back(inConfig);
-                }
-
-                DataConfig outConfig;
-                outConfig.desc = TensorDesc(precision, outDims, layout);
-                config.outConfs.push_back(outConfig);
-
-                config.dynBatchSupport = false;
-                confs.push_back(config);
-            };
-
-            createConfig(inDims.size() == 4 ? NHWC : NDHWC);
-            createConfig(TensorDesc::getLayoutByDims(inDims));
-
-            std::vector<std::pair<ConfLayout, ConfLayout>>  blockConfs { };
-            if (inDims[1] % 8 == 0)  blockConfs.push_back({ConfLayout::BLK8, ConfLayout::BLK8});
-            if (inDims[1] % 16 == 0) blockConfs.push_back({ConfLayout::BLK16, ConfLayout::BLK16});
-            for (auto conf : blockConfs) {
-                addConfig(layer, {DataConfigurator(conf.first, precision),
-                                  DataConfigurator(ConfLayout::PLN, spaceToBatchLayer->insData[1].lock()->getPrecision()),
-                                  DataConfigurator(ConfLayout::PLN, spaceToBatchLayer->insData[2].lock()->getPrecision()),
-                                  DataConfigurator(ConfLayout::PLN, spaceToBatchLayer->insData[3].lock()->getPrecision())},
-                          {DataConfigurator(conf.second, precision)});
-            }
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        switch (inputs[0]->getTensorDesc().getPrecision().size()) {
-            case 1: spaceToBatchKernel<PrecisionTrait<Precision::U8>::value_type> (inputs, outputs); break;
-            case 2: spaceToBatchKernel<PrecisionTrait<Precision::U16>::value_type>(inputs, outputs); break;
-            case 4: spaceToBatchKernel<PrecisionTrait<Precision::I32>::value_type>(inputs, outputs); break;
-            default: {
-                if (resp) {
-                    std::string errorMsg = "SpaceToBatch layer with name does not support precision '"
-                                           + std::string(inputs[0]->getTensorDesc().getPrecision().name()) + "'";
-                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-
-                    return GENERAL_ERROR;
-                }
-            }
-        }
-
-        return OK;
-    }
-
-private:
-    std::vector<size_t> getShape5D(const SizeVector &shape) {
-        std::vector<size_t> shape5D(5, 1);
-        for (int i = 0; i < 2; i++) {
-            shape5D[i] = shape[i];
-            shape5D[4 - i] = shape[shape.size() - 1 - i];
-        }
-        shape5D[2] = shape.size() == 5 ? shape[2] : shape5D[2];
-        return shape5D;
-    }
-
-    template<typename T>
-    void spaceToBatchKernel(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs) noexcept {
-        const T *srcData = inputs[0]->cbuffer().as<const T *>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        T *dstData = outputs[0]->buffer().as<T *>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        const auto layout = inputs[0]->getTensorDesc().getLayout();
-        const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC;
-        const auto dimsSize = inDims.size();
-
-        auto inShape5D  = getShape5D(outDims);
-        auto outShape5D = getShape5D(inDims);
-        auto blockShape = getShape5D(blockShapeIn);
-
-        if (layout == NHWC || layout == NDHWC) {
-            inShape5D.push_back(inShape5D[1]);
-            inShape5D.erase(inShape5D.begin() + 1);
-            outShape5D.push_back(outShape5D[1]);
-            outShape5D.erase(outShape5D.begin() + 1);
-            blockShape.push_back(blockShape[1]);
-            blockShape.erase(blockShape.begin() + 1);
-        }
-
-        const size_t blockSize = blocked ? outputs[0]->getTensorDesc().getBlockingDesc().getBlockDims().back() : 1lu;
-        const size_t blockCountInput = outputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1];
-        const size_t blockCountOutput = inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1];
-        const auto blockRemainder = inShape5D[1] % blockSize;
-        const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;
-
-        const size_t inSpatialStep = inShape5D[2] * inShape5D[3] * inShape5D[4];
-        const size_t inBatchStep = (blocked ? blockSize * blockCountInput : inShape5D[1]) * inSpatialStep;
-
-        const size_t outSpatialStep = outShape5D[2] * outShape5D[3] * outShape5D[4];
-        const size_t outBatchStep = (blocked ? blockSize * blockCountOutput : outShape5D[1]) * outSpatialStep;
-
-        parallel_nt(0, [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(inShape5D[0] * inBatchStep, nthr, ithr, start, end);
-            std::fill(dstData + start, dstData + end, T(0));
-        });
-
-        size_t channels = (inShape5D[1] / blockSize);
-        channels = channels == 0 ? 1 : channels;
-        const size_t workAmount = inShape5D[0] * channels;
-
-        parallel_nt(0, [&](const int ithr, const int nthr) {
-            size_t start(0lu), end(0lu);
-            splitter(workAmount, nthr, ithr, start, end);
-            std::vector<size_t> indxStart(2, 0);
-            std::vector<size_t> indxEnd(2, 0);
-            parallel_it_init(start, indxStart[0], inShape5D[0], indxStart[1], channels);
-            parallel_it_init((end - 1), indxEnd[0], inShape5D[0], indxEnd[1], channels);
-            std::vector<int64_t> oAdd(5, 1);
-            std::vector<size_t> begin(5, 0);
-            std::vector<size_t> finish(5, 1);
-            for (size_t i0 = indxStart[0]; i0 < indxEnd[0] + 1; ++i0) {
-                int64_t bIdx = i0 / outShape5D[0];
-                const size_t srcIdx0 = (i0 - (bIdx * outShape5D[0])) * outBatchStep;
-                const size_t dstIdx0 = i0 * inBatchStep;
-                oAdd[4] = bIdx % blockShapeIn[dimsSize - 1] - padsBeginIn[dimsSize - 1];
-                bIdx /= blockShapeIn[dimsSize - 1];
-                oAdd[3] = bIdx % blockShapeIn[dimsSize - 2] - padsBeginIn[dimsSize - 2];
-                bIdx /= blockShapeIn[dimsSize - 2];
-                oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu;
-                bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
-                oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1];
-                if (layout == NHWC || layout == NDHWC) {
-                    oAdd.push_back(oAdd[1]);
-                    oAdd.erase(oAdd.begin() + 1);
-                }
-                begin[1] = (blockShape[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
-                finish[1] = (outShape5D[1] - 1 - oAdd[1]) / blockShape[1] / blockSize;
-                begin[2] = (blockShape[2] - 1 - oAdd[2]) / blockShape[2];
-                finish[2] = (outShape5D[2] - 1 - oAdd[2]) / blockShape[2];
-                begin[3] = (blockShape[3] - 1 - oAdd[3]) / blockShape[3];
-                finish[3] = (outShape5D[3] - 1 - oAdd[3]) / blockShape[3];
-                begin[4] = (blockShape[4] - 1 - oAdd[4]) / blockShape[4];
-                finish[4] = (outShape5D[4] - 1 - oAdd[4]) / blockShape[4];
-                const int64_t addTmpOC = blocked ? 0lu : oAdd[1];
-                const int64_t addTmpOc = blocked ? oAdd[1] : 0lu;
-                indxStart[1] = begin[1] > indxStart[1] ? begin[1] : indxStart[1];
-                const size_t lastI1 = i0 == indxEnd[0] ? (indxEnd[1] > finish[1] ? finish[1] : indxEnd[1]) : finish[1];
-                for (; indxStart[1] < lastI1 + 1; ++indxStart[1]) {
-                    const size_t block = indxStart[1] == finish[1] ? lastBlock : blockSize;
-                    const int64_t tmpOC = indxStart[1] * blockShape[1] + addTmpOC;
-                    const size_t srcIdx1 = srcIdx0 + tmpOC * outSpatialStep * blockSize;
-                    const size_t dstIdx1 = dstIdx0 + indxStart[1] * inSpatialStep * blockSize;
-                    const size_t itEnd = blocked ? ((block - 1) * blockShape[1] + oAdd[1]) / blockSize : 0lu;
-                    for (size_t i2 = begin[2]; i2 < finish[2] + 1; ++i2) {
-                        const int64_t tmpOd = i2 * blockShape[2] + oAdd[2];
-                        const size_t srcIdx2 = srcIdx1 + tmpOd * outShape5D[3] * outShape5D[4] * blockSize;
-                        const size_t dstIdx2 = dstIdx1 + i2 * inShape5D[3] * inShape5D[4] * blockSize;
-                        for (size_t i3 = begin[3]; i3 < finish[3] + 1; ++i3) {
-                            const int64_t tmpOh = i3 * blockShape[3] + oAdd[3];
-                            const size_t srcIdx3 = srcIdx2 + tmpOh * outShape5D[4] * blockSize;
-                            const size_t dstIdx3 = dstIdx2 + i3 * inShape5D[4] * blockSize;
-                            for (size_t i4 = begin[4]; i4 < finish[4] + 1; ++i4) {
-                                const int64_t tmpOw = i4 * blockShape[4] + oAdd[4];
-                                const size_t srcIdx4 = srcIdx3 + tmpOw * blockSize;
-                                const size_t dstIdx4 = dstIdx3 + i4 * blockSize;
-                                for (size_t it = 0; it < itEnd + 1; ++it) {
-                                    const size_t i5Begin = it == 0 ? 0 : (it * blockSize - 1 - oAdd[1]) / blockShape[1] + 1;
-                                    const size_t i5End = it == itEnd ? (block - 1) : ((it + 1) * blockSize - 1 - oAdd[1]) / blockShape[1];
-                                    for (size_t i5 = i5Begin; i5 < i5End + 1; ++i5) {
-                                        const int64_t tmpOc = i5 * blockShape[1] + addTmpOc;
-                                        const size_t srcIdx5 = srcIdx4 + it * outSpatialStep * blockSize + (tmpOc - it * blockSize);
-                                        const size_t dstIdx5 = dstIdx4 + i5;
-                                        dstData[dstIdx5] = srcData[srcIdx5];
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-                indxStart[1] = 0lu;
-            }
-        });
-    }
-
-    SizeVector inDims;
-    SizeVector outDims;
-    std::vector<size_t> blockShapeIn;
-    std::vector<size_t> padsBeginIn;
-};
-
-REG_FACTORY_FOR(SpaceToBatchImpl, SpaceToBatch);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
-
diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
deleted file mode 100644
index 907f1bdd7e7..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp
+++ /dev/null
@@ -1,227 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <array>
-#include <cassert>
-#include <algorithm>
-#include <limits>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class SparseFillEmptyRowsImpl : public ExtLayerBase {
-public:
-    explicit SparseFillEmptyRowsImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != 4 || layer->outData.size() != 3) {
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-            }
-
-            // check dimensions of input tensors
-            SizeVector input_indices_dims = layer->insData[INPUT_INDICES_PORT].lock()->getTensorDesc().getDims();
-            if (input_indices_dims.size() != 2 || input_indices_dims[1] != 2) {
-                IE_THROW() << layer->name << " Incorrect dimensions for input indices. It must be Nx2 dimension tensor.";
-            }
-            SizeVector input_values_dims = layer->insData[INPUT_VALUES_PORT].lock()->getTensorDesc().getDims();
-            if (input_values_dims.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions for input values. It must be N dimension tensor.";
-            }
-            if (input_indices_dims[0] != input_values_dims[0]) {
-                IE_THROW() << layer->name << " Mismatch of the first dimensions of input indices and values.";
-            }
-            SizeVector input_dense_shape_dims = layer->insData[INPUT_DENSE_SHAPE_PORT].lock()->getTensorDesc().getDims();
-            if (input_dense_shape_dims.size() != 1 || input_dense_shape_dims[0] != 2) {
-                IE_THROW() << layer->name << " Incorrect dimensions for input dense shape.";
-            }
-            SizeVector input_default_value_dims = layer->insData[INPUT_DEFAULT_VALUE_PORT].lock()->getTensorDesc().getDims();
-            if (input_default_value_dims[0] != 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions for input dense shape.";
-            }
-            inMaxNumValues = input_indices_dims[0];
-
-            // check dimensions of output tensors
-            SizeVector output_indices_dims = layer->outData[OUTPUT_INDICES_PORT]->getTensorDesc().getDims();
-            if (output_indices_dims.size() != 2 || output_indices_dims[1] != 2) {
-                IE_THROW() << layer->name << " Incorrect dimensions for output indices. It must be Nx2 dimension tensor.";
-            }
-            SizeVector output_values_dims = layer->outData[OUTPUT_VALUES_PORT]->getTensorDesc().getDims();
-            if (output_values_dims.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions for output values. It must be N dimension tensor.";
-            }
-            if (output_indices_dims[0] != output_values_dims[0]) {
-                IE_THROW() << layer->name << " Mismatch of the first dimensions of output indices and values.";
-            }
-            SizeVector output_empty_rows_indicator_dims = layer->outData[OUTPUT_EMPTY_ROWS_INDICATOR_PORT]->getTensorDesc().getDims();
-            if (output_empty_rows_indicator_dims.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions for output empty rows indicator. It must be 1-D tensor.";
-            }
-            outMaxNumValues = output_indices_dims[0];
-            if (outMaxNumValues < inMaxNumValues) {
-                IE_THROW() << layer->name << " The first dimension size of input indices can not be greater the first dimension of output indices.";
-            }
-
-            // TODO: check that dense shape value is set
-            addConfig(layer,
-                {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32)},
-                {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                DataConfigurator(ConfLayout::PLN, Precision::FP32)});
-        }
-        catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        const float *input_indices_ptr = inputs[INPUT_INDICES_PORT]->cbuffer().as<const float *>() +
-            inputs[INPUT_INDICES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const float *input_values_ptr = inputs[INPUT_VALUES_PORT]->cbuffer().as<const float *>() +
-            inputs[INPUT_VALUES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const float *dense_shape_ptr = inputs[INPUT_DENSE_SHAPE_PORT]->cbuffer().as<const float *>() +
-            inputs[INPUT_DENSE_SHAPE_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const float *default_value_ptr = inputs[INPUT_DEFAULT_VALUE_PORT]->cbuffer().as<const float *>() +
-            inputs[INPUT_DEFAULT_VALUE_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        float default_value = default_value_ptr[0];
-        float num_rows = dense_shape_ptr[0];
-        float num_cols = dense_shape_ptr[1];
-
-        // compute actual number of values by searching out of range indice that serves as a marker
-        size_t in_actual_num_values = 0;
-        for (in_actual_num_values = 0; in_actual_num_values < inMaxNumValues; in_actual_num_values++) {
-            float indice_x = input_indices_ptr[2 * in_actual_num_values];
-            float indice_y = input_indices_ptr[2 * in_actual_num_values + 1];
-            if (indice_x < 0 || indice_y < 0 || indice_x >= num_rows || indice_y >= num_cols) break;
-        }
-
-        // create auxiliary container for sorting
-        std::vector<std::array<float, 3>> indices_values(in_actual_num_values);
-        parallel_for(in_actual_num_values, [&](size_t i) {
-            float row = input_indices_ptr[2 * i];
-            float col = input_indices_ptr[2 * i + 1];
-            float value = input_values_ptr[i];
-            std::array<float, 3> elem = { row, col, value };
-            indices_values[i] = elem;
-        });
-
-        // sort values by row
-        parallel_sort(indices_values.begin(), indices_values.end(),
-            [](const std::array<float, 3>& first, const std::array<float, 3>& second) {
-            return first[0] < second[0];
-        });
-
-        // unsplit indices and values
-        std::vector<float> indices_with_sorted_rows(in_actual_num_values * 2);
-        std::vector<float> values_for_sorted_rows(in_actual_num_values);
-        parallel_for(in_actual_num_values, [&](size_t i) {
-            auto elem = indices_values[i];
-            indices_with_sorted_rows[i * 2] = elem[0];
-            indices_with_sorted_rows[i * 2 + 1] = elem[1];
-            values_for_sorted_rows[i] = elem[2];
-        });
-
-        // compute start indice for each row and a number of values at each row
-        std::vector<int> values_at_row(static_cast<unsigned int>(num_rows));
-        std::fill(values_at_row.begin(), values_at_row.end(), 0);
-        float prev_row_with_value = -1.0f;
-        unsigned int total_num_values = 0;
-        std::vector<std::array<float, 3>>::iterator curr_it, prev_it;
-        for (float row_ind = 0.0; row_ind < num_rows; row_ind = row_ind + 1.0f) {
-            curr_it = std::find_if(indices_values.begin(), indices_values.end(),
-                [row_ind](std::array<float, 3> elem) { return elem[0] == row_ind; });
-            if (curr_it != indices_values.end()) {
-                if (prev_row_with_value != -1.0f) {
-                    unsigned int num_values_at_prev_row = static_cast<unsigned int>(std::distance(prev_it, curr_it));
-                    values_at_row[static_cast<int>(prev_row_with_value)] = num_values_at_prev_row;
-                    total_num_values += num_values_at_prev_row;
-                }
-                prev_row_with_value = row_ind;
-                prev_it = curr_it;
-            } else {
-                total_num_values++;
-            }
-        }
-        if (prev_row_with_value != -1.0) {
-            unsigned int num_values_at_prev_row = static_cast<unsigned int>(std::distance(prev_it, indices_values.end()));
-            values_at_row[static_cast<int>(prev_row_with_value)] = num_values_at_prev_row;
-            total_num_values += num_values_at_prev_row;
-        }
-
-        // check that output buffer size is sufficient
-        if (outMaxNumValues < total_num_values) return GENERAL_ERROR;
-
-        // create output indices
-        float *output_indices_ptr = outputs[OUTPUT_INDICES_PORT]->cbuffer().as<float *>() +
-            inputs[OUTPUT_INDICES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        float *output_values_ptr = outputs[OUTPUT_VALUES_PORT]->cbuffer().as<float *>() +
-            inputs[OUTPUT_VALUES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        float *output_empty_rows_indicator_ptr = outputs[OUTPUT_EMPTY_ROWS_INDICATOR_PORT]->cbuffer().as<float *>() +
-            inputs[OUTPUT_EMPTY_ROWS_INDICATOR_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        auto output_indices_size = outputs[OUTPUT_INDICES_PORT]->byteSize();
-        memset(output_indices_ptr, 0, output_indices_size);
-
-        auto output_values_size = outputs[OUTPUT_VALUES_PORT]->byteSize();
-        memset(output_values_ptr, 0, output_values_size);
-
-        auto output_empty_rows_indicator_size = outputs[OUTPUT_EMPTY_ROWS_INDICATOR_PORT]->byteSize();
-        memset(output_empty_rows_indicator_ptr, 0, output_empty_rows_indicator_size);
-
-
-        unsigned int curr_pos_from_copy = 0;
-        unsigned int curr_pos_to_copy = 0;
-        for (int row_ind = 0; row_ind < static_cast<int>(num_rows); row_ind++) {
-            unsigned int num_values_at_row = values_at_row[row_ind];
-            if (num_values_at_row == 0) {
-                output_empty_rows_indicator_ptr[row_ind] = 1.0;
-                output_values_ptr[curr_pos_to_copy] = default_value;
-                output_indices_ptr[curr_pos_to_copy * 2] = static_cast<float>(row_ind);
-                output_indices_ptr[curr_pos_to_copy * 2 + 1] = 0.0;
-                curr_pos_to_copy++;
-            } else {
-                output_empty_rows_indicator_ptr[row_ind] = 0.0;
-                std::copy(values_for_sorted_rows.begin() + curr_pos_from_copy,
-                    values_for_sorted_rows.begin() + curr_pos_from_copy + num_values_at_row,
-                    output_values_ptr + curr_pos_to_copy);
-                std::copy(indices_with_sorted_rows.begin() + 2 * curr_pos_from_copy,
-                    indices_with_sorted_rows.begin() + 2 * curr_pos_from_copy + 2 * num_values_at_row, output_indices_ptr + curr_pos_to_copy * 2);
-                curr_pos_to_copy += num_values_at_row;
-                curr_pos_from_copy += num_values_at_row;
-            }
-        }
-
-        // mark the end of output using (-1, -1) indice
-        if (total_num_values < outMaxNumValues) {
-            output_indices_ptr[total_num_values * 2] = -1.0;
-            output_indices_ptr[total_num_values * 2 + 1] = -1.0;
-        }
-
-        return OK;
-    }
-
-private:
-    const size_t INPUT_INDICES_PORT = 0;
-    const size_t INPUT_VALUES_PORT = 1;
-    const size_t INPUT_DENSE_SHAPE_PORT = 2;
-    const size_t INPUT_DEFAULT_VALUE_PORT = 3;
-    const size_t OUTPUT_INDICES_PORT = 0;
-    const size_t OUTPUT_VALUES_PORT = 1;
-    const size_t OUTPUT_EMPTY_ROWS_INDICATOR_PORT = 2;
-
-    size_t inMaxNumValues = 0;
-    size_t outMaxNumValues = 0;
-};
-
-REG_FACTORY_FOR(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
deleted file mode 100644
index 9ae02fb7f0c..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <cassert>
-#include <algorithm>
-#include <limits>
-#include <functional>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class SparseSegmentReduceImpl : public ExtLayerBase {
-private:
-    // supported operations for the reduction
-    enum ReducedOp { sum, mean, sqrtn};
-
-public:
-    explicit SparseSegmentReduceImpl(const CNNLayer* layer) {
-        try {
-            // check a number of input/output edges
-            if (layer->insData.size() != 3 || layer->outData.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-            }
-
-            // check operation by which it reduces
-            std::string reduce_mode = layer->type;
-            if (reduce_mode == "SparseSegmentSum") reduction_op = ReducedOp::sum;
-            else if (reduce_mode == "SparseSegmentMean") reduction_op = ReducedOp::mean;
-            else if (reduce_mode == "SparseSegmentSqrtN") reduction_op = ReducedOp::sqrtn;
-            else
-                IE_THROW() << layer->name << " Incorrect SparseSegmentReduce layer type!";
-
-            // check shapes of the second and third input tensors
-            input_indices_dims = layer->insData[INPUT_INDICES_PORT].lock()->getTensorDesc().getDims();
-            if (input_indices_dims.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions for input indices. It must be a one-dimensional tensor.";
-            }
-            input_segment_ids_dims = layer->insData[INPUT_SEGMENT_IDS_PORT].lock()->getTensorDesc().getDims();
-            if (input_segment_ids_dims.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions for input segment IDs. It must be a one-dimensional tensor.";
-            }
-            if (input_indices_dims[0] != input_segment_ids_dims[0]) {
-                IE_THROW() << layer->name << " Shapes for input indices and segment IDs must match.";
-            }
-
-            // check shapes of output tensor
-            input_data_dims = layer->insData[INPUT_DATA_PORT].lock()->getTensorDesc().getDims();
-            output_dims = layer->outData[OUTPUT_PORT]->getTensorDesc().getDims();
-            if (output_dims.size() != input_data_dims.size()) {
-                IE_THROW() << layer->name << " Incorrect dimensions for output.";
-            }
-            if (output_dims[0] != input_segment_ids_dims[0]) {
-                IE_THROW() << layer->name << " Incorrect dimensions for output.";
-            }
-            for (size_t i = 1; i < output_dims.size(); i++) {
-                if (output_dims[i] != input_data_dims[i]) {
-                    IE_THROW() << layer->name << " Incorrect dimensions for output.";
-                }
-            }
-
-            // confugure layouts of input and output ports
-            addConfig(layer,
-                { DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                DataConfigurator(ConfLayout::PLN, Precision::FP32) }, { DataConfigurator(ConfLayout::PLN, Precision::FP32) });
-        }
-        catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        const float *input_data_ptr = inputs[INPUT_DATA_PORT]->cbuffer().as<const float *>() +
-            inputs[INPUT_DATA_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const float *input_indices_ptr = inputs[INPUT_INDICES_PORT]->cbuffer().as<const float *>() +
-            inputs[INPUT_INDICES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const float *input_segment_ids_ptr = inputs[INPUT_SEGMENT_IDS_PORT]->cbuffer().as<const float *>() +
-            inputs[INPUT_SEGMENT_IDS_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        float *output_ptr = outputs[OUTPUT_PORT]->cbuffer().as<float *>() +
-            inputs[OUTPUT_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        // compute a number of elements in data slice
-        size_t num_indices = input_indices_dims[0];
-        size_t num_slices = input_data_dims[0];
-        size_t num_elements_in_slice = std::accumulate(input_data_dims.begin(), input_data_dims.end(), 1, std::multiplies<size_t>()) / num_slices;
-
-        // check that indices in a range [0; num_slices)
-        if (std::any_of(input_indices_ptr, input_indices_ptr + num_indices,
-            [num_slices](float idx) {return idx < 0.f || idx >= static_cast<float>(num_slices);})) {
-            return GENERAL_ERROR;
-        }
-
-        // check that segment IDs are sorted
-        for (size_t i = 1; i < num_indices; i++) {
-            if (input_segment_ids_ptr[i] < input_segment_ids_ptr[i - 1]) {
-                return GENERAL_ERROR;
-            }
-        }
-
-        // compute start indices for segments in indices tensor
-        size_t num_segments = static_cast<size_t>(input_segment_ids_ptr[num_indices - 1]) + 1;
-        std::vector<size_t> segment_starts(num_segments);
-        int prev_segment_id = -1;
-        for (size_t i = 0; i < num_indices; i++) {
-            if (i > 0 && input_segment_ids_ptr[i] == input_segment_ids_ptr[i - 1]) {
-                continue;
-            }
-            int cur_segment_id = static_cast<int>(input_segment_ids_ptr[i]);
-            for (int tmp_segment_ids = prev_segment_id + 1; tmp_segment_ids <= cur_segment_id; tmp_segment_ids++) {
-                segment_starts[tmp_segment_ids] = i;
-            }
-            prev_segment_id = cur_segment_id;
-        }
-
-        // zero output buffer
-        std::memset(output_ptr, 0, output_dims[0] * num_elements_in_slice * sizeof(float));
-
-        // compute the result for each segment in parallel
-        parallel_for(num_segments, [&](size_t segment_id) {
-            float *segment_ptr = output_ptr + segment_id * num_elements_in_slice;
-            size_t start = segment_starts[segment_id];
-            size_t end = (segment_id == (num_segments - 1)) ? num_indices : segment_starts[segment_id + 1];
-
-            // scatter data and reduce for one segment
-            for (size_t idx = start; idx < end; idx++) {
-                size_t indice = input_indices_ptr[idx];
-                std::transform(segment_ptr, segment_ptr + num_elements_in_slice,
-                    input_data_ptr + indice * num_elements_in_slice,
-                    segment_ptr, std::plus<float>());
-            }
-        });
-
-        if (reduction_op == ReducedOp::mean) {
-            parallel_for(num_segments, [&](size_t segment_id) {
-                float *segment_ptr = output_ptr + segment_id * num_elements_in_slice;
-                size_t start = segment_starts[segment_id];
-                size_t end = (segment_id == (num_segments - 1)) ? num_indices : segment_starts[segment_id + 1];
-                float num_adds = static_cast<float>(end - start);
-                if (num_adds > 0) {
-                    std::transform(segment_ptr, segment_ptr + num_elements_in_slice, segment_ptr,
-                        [num_adds](float elem) { return elem / num_adds; });
-                }
-            });
-        }
-
-        if (reduction_op == ReducedOp::sqrtn) {
-            parallel_for(num_segments, [&](size_t segment_id) {
-                float *segment_ptr = output_ptr + segment_id * num_elements_in_slice;
-                size_t start = segment_starts[segment_id];
-                size_t end = (segment_id == (num_segments - 1)) ? num_indices : segment_starts[segment_id + 1];
-                float sqrtn = sqrtf(static_cast<float>(end - start));
-                if (sqrtn > 0) {
-                    std::transform(segment_ptr, segment_ptr + num_elements_in_slice, segment_ptr,
-                        [sqrtn](float elem) { return elem / sqrtn; });
-                }
-            });
-        }
-
-        return OK;
-    }
-
-private:
-    const size_t INPUT_DATA_PORT = 0;
-    const size_t INPUT_INDICES_PORT = 1;
-    const size_t INPUT_SEGMENT_IDS_PORT = 2;
-    const size_t OUTPUT_PORT = 0;
-
-    SizeVector input_data_dims;
-    SizeVector input_indices_dims;
-    SizeVector input_segment_ids_dims;
-    SizeVector output_dims;
-
-    ReducedOp reduction_op;
-};
-
-REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentMean);
-REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSqrtN);
-REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSum);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
deleted file mode 100644
index 2e23cdc56df..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <array>
-#include <cassert>
-#include <algorithm>
-#include <limits>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class SparseToDenseImpl : public ExtLayerBase {
-public:
-    explicit SparseToDenseImpl(const CNNLayer* layer) {
-        try {
-            if ((layer->insData.size() != 3 && layer->insData.size() != 4) || layer->outData.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-            }
-            if (layer->insData.size() == 4) {
-                with_default_value = true;
-            }
-
-            // check dimensions of input tensors
-            SizeVector input_dense_shape_dims = layer->insData[INPUT_DENSE_SHAPE_PORT].lock()->getTensorDesc().getDims();
-            if (input_dense_shape_dims.size() != 1 || input_dense_shape_dims[0] < 1) {
-                IE_THROW() << layer->name << " Incorrect dimensions for input dense shape. It must be 1D dimension tensor.";
-            }
-            dense_tensor_rank = input_dense_shape_dims[0];
-            SizeVector input_indices_dims = layer->insData[INPUT_INDICES_PORT].lock()->getTensorDesc().getDims();
-            if (input_indices_dims.size() != 2 || input_indices_dims[1] != dense_tensor_rank) {
-                IE_THROW() << layer->name << " Incorrect dimensions for input indices.";
-            }
-            SizeVector input_values_dims = layer->insData[INPUT_VALUES_PORT].lock()->getTensorDesc().getDims();
-            if (input_values_dims.size() != 1 || input_values_dims[0] != input_indices_dims[0]) {
-                IE_THROW() << layer->name << " Incorrect dimensions for input values.";
-            }
-            if (with_default_value) {
-                SizeVector input_default_value_dims = layer->insData[INPUT_DEFAULT_VALUE_PORT].lock()->getTensorDesc().getDims();
-                if (input_default_value_dims.size() != 0) {
-                    IE_THROW() << layer->name << " Incorrect dimensions for input default value.";
-                }
-            }
-            input_num_values = input_values_dims[0];
-
-            // TODO: check that dense shape value is set
-            if (with_default_value) {
-                addConfig(layer,
-                { DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN, Precision::I32),
-                    DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN, Precision::I32) },
-                { DataConfigurator(ConfLayout::PLN, Precision::I32) });
-            } else {
-                addConfig(layer,
-                { DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN, Precision::I32),
-                    DataConfigurator(ConfLayout::PLN, Precision::I32) },
-                    { DataConfigurator(ConfLayout::PLN, Precision::I32) });
-            }
-        }
-        catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        const int *input_indices_ptr = inputs[INPUT_INDICES_PORT]->cbuffer().as<const int *>() +
-            inputs[INPUT_INDICES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int *input_dense_shape_ptr = inputs[INPUT_DENSE_SHAPE_PORT]->cbuffer().as<const int *>() +
-            inputs[INPUT_DENSE_SHAPE_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int *input_values_ptr = inputs[INPUT_VALUES_PORT]->cbuffer().as<const int *>() +
-            inputs[INPUT_VALUES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        int default_value = 0;
-        if (with_default_value) {
-            const int *input_default_value_ptr = inputs[INPUT_DEFAULT_VALUE_PORT]->cbuffer().as<const int *>() +
-                inputs[INPUT_DEFAULT_VALUE_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-            default_value = *input_default_value_ptr;
-        }
-        int *output_ptr = outputs[OUTPUT_PORT]->cbuffer().as<int *>() +
-            inputs[OUTPUT_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        size_t output_num_values = 1;
-        for (size_t ind = 0; ind < dense_tensor_rank; ind++) {
-            output_num_values *= input_dense_shape_ptr[ind];
-        }
-
-        // fill the output tensor with the default value
-        for (size_t ind = 0; ind < output_num_values; ind++) {
-            output_ptr[ind] = default_value;
-        }
-
-        // walkthrough all indices and fill the output tensor with corresponding values
-        for (size_t ind = 0; ind < input_num_values; ind++) {
-            int value = input_values_ptr[ind];
-            size_t placement = 0;
-            const int *tmp_indice_ptr = input_indices_ptr + ind * dense_tensor_rank;
-            size_t num_values_in_slice = output_num_values;
-            for (size_t subindice_ind = 0; subindice_ind < dense_tensor_rank; subindice_ind++) {
-                num_values_in_slice /= input_dense_shape_ptr[subindice_ind];
-                size_t subindice = static_cast<size_t>(tmp_indice_ptr[subindice_ind]);
-                if (subindice >= input_dense_shape_ptr[subindice_ind]) {
-                    if (resp) {
-                        std::string errorMsg = "Value of index is out of bound!";
-                        errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-                    }
-                    return GENERAL_ERROR;
-                }
-                placement += subindice * num_values_in_slice;
-            }
-            output_ptr[placement] = value;
-        }
-
-        return OK;
-    }
-
-private:
-    const size_t INPUT_INDICES_PORT = 0;
-    const size_t INPUT_DENSE_SHAPE_PORT = 1;
-    const size_t INPUT_VALUES_PORT = 2;
-    const size_t INPUT_DEFAULT_VALUE_PORT = 3;
-    const size_t OUTPUT_PORT = 0;
-
-    size_t dense_tensor_rank = 0;
-    size_t input_num_values = 0;
-    bool with_default_value = false;
-};
-
-REG_FACTORY_FOR(SparseToDenseImpl, SparseToDense);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
deleted file mode 100644
index b1e2a56afe5..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <array>
-#include <cassert>
-#include <algorithm>
-#include <limits>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class ExperimentalSparseWeightedReduceImpl : public ExtLayerBase {
-private:
-    // supported operations for the reduction
-    enum ReducedOp {sum};
-
-public:
-    explicit ExperimentalSparseWeightedReduceImpl(const CNNLayer* layer) {
-        try {
-            if ((layer->insData.size() != 5 && layer->insData.size() != 6) || layer->outData.size() != 1) {
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-            }
-            if (layer->insData.size() == 6) {
-                with_weights = true;
-            }
-
-            // check operation by which it reduces
-            std::string reduce_mode = layer->type;
-            if (reduce_mode == "ExperimentalSparseWeightedSum") reduction_op = ReducedOp::sum;
-            else
-                IE_THROW() << layer->name << " Incorrect ExperimentalSparseWeightedReduce layer type!";
-
-            // check a precision of input tensors
-            input_indices_precision = layer->insData[INPUT_INDICES_PORT].lock()->getTensorDesc().getPrecision();
-            input_values_precision = layer->insData[INPUT_VALUES_PORT].lock()->getTensorDesc().getPrecision();
-            input_dense_shape_precision = layer->insData[INPUT_DENSE_SHAPE_PORT].lock()->getTensorDesc().getPrecision();
-            input_parameters_table_precision = layer->insData[INPUT_PARAMETERS_TABLE_PORT].lock()->getTensorDesc().getPrecision();
-            input_default_value_precision = layer->insData[INPUT_DEFAULT_VALUE_PORT].lock()->getTensorDesc().getPrecision();
-
-            bool are_other_precisions_valid = (input_indices_precision == Precision::I32 &&
-                input_values_precision == Precision::I32 &&
-                input_dense_shape_precision == Precision::I32);
-            if (are_other_precisions_valid == false) {
-                IE_THROW() << layer->name << " Incorrect precision of the input tensors.";
-            }
-
-            if (input_parameters_table_precision != Precision::FP32) {
-                IE_THROW() << layer->name
-                                   << " Incorrect precision of the input parameters table values. Only FP32 is supported!";
-            }
-
-            if (input_default_value_precision != Precision::I32) {
-                IE_THROW() << layer->name
-                                   << " Incorrect precision of the input default value. Only I32 is supported!";
-            }
-
-            if (with_weights) {
-                Precision input_weights_precision = layer->insData[INPUT_WEIGHTS_PORT].lock()->getTensorDesc().getPrecision();
-                if (input_weights_precision != Precision::FP32) {
-                    IE_THROW() << layer->name
-                                       << " Incorrect precision of the input weights values. Only FP32 is supported!";
-                }
-            }
-
-            // check dimensions of input tensors
-            SizeVector input_indices_dims = layer->insData[INPUT_INDICES_PORT].lock()->getTensorDesc().getDims();
-            if (input_indices_dims.size() != 2 || input_indices_dims[1] != 2) {
-                IE_THROW() << layer->name
-                                   << " Incorrect dimensions for input indices. It must be Nx2 dimension tensor.";
-            }
-            SizeVector input_values_dims = layer->insData[INPUT_VALUES_PORT].lock()->getTensorDesc().getDims();
-            if (input_values_dims.size() != 1) {
-                IE_THROW() << layer->name
-                                   << " Incorrect dimensions for input values. It must be N dimension tensor.";
-            }
-            if (input_indices_dims[0] != input_values_dims[0]) {
-                IE_THROW() << layer->name
-                                   << " Mismatch of the first dimensions of input indices and values.";
-            }
-            SizeVector input_dense_shape_dims = layer->insData[INPUT_DENSE_SHAPE_PORT].lock()->getTensorDesc().getDims();
-            if (input_dense_shape_dims.size() != 1 || input_dense_shape_dims[0] != 2) {
-                IE_THROW() << layer->name
-                                   << " Incorrect dimensions for input dense shape.";
-            }
-            SizeVector input_parameters_table_dims = layer->insData[INPUT_PARAMETERS_TABLE_PORT].lock()->getTensorDesc().getDims();
-            if (input_parameters_table_dims.size() < 2) {
-                IE_THROW() << layer->name
-                                   << " Incorrect dimensions for input parameters table.";
-            }
-            SizeVector input_default_value_dims = layer->insData[INPUT_DEFAULT_VALUE_PORT].lock()->getTensorDesc().getDims();
-            if (input_default_value_dims.size() != 0) {
-                IE_THROW() << layer->name
-                                   << " Incorrect dimensions for input default value.";
-            }
-            if (with_weights) {
-                SizeVector input_weights_dims = layer->insData[INPUT_WEIGHTS_PORT].lock()->getTensorDesc().getDims();
-                if (input_weights_dims.size() != 1) {
-                    IE_THROW() << layer->name
-                                       << " Incorrect dimensions for input weights. It must be N dimension tensor.";
-                }
-                if (input_weights_dims[0] != input_values_dims[0]) {
-                    IE_THROW() << layer->name
-                                       << " Mismatch of the first dimensions of input weights and values.";
-                }
-            }
-            input_num_values = input_indices_dims[0];
-
-            // check dimensions of output tensors
-            SizeVector output_dims = layer->outData[OUTPUT_PORT]->getTensorDesc().getDims();
-            if (output_dims.size() != input_parameters_table_dims.size()) {
-                IE_THROW() << layer->name << " Incorrect dimensions for the output tensor.";
-            }
-            output_batch_size = output_dims[0];
-            output_elem_size = 1;
-            for (size_t ind = 1; ind < input_parameters_table_dims.size(); ind++) {
-                output_elem_size *= input_parameters_table_dims[ind];
-            }
-
-            // TODO: check that dense shape value is set
-            if (with_weights) {
-                addConfig(layer,
-                { DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN, Precision::I32),
-                    DataConfigurator(ConfLayout::PLN, Precision::I32),
-                    DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::I32),
-                    DataConfigurator(ConfLayout::PLN, Precision::FP32) }, { DataConfigurator(ConfLayout::PLN, Precision::FP32) });
-            } else {
-                addConfig(layer,
-                { DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN, Precision::I32),
-                    DataConfigurator(ConfLayout::PLN, Precision::I32), DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                    DataConfigurator(ConfLayout::PLN, Precision::I32) }, { DataConfigurator(ConfLayout::PLN, Precision::FP32) });
-            }
-        }
-        catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        const int *input_indices_i32_ptr = inputs[INPUT_INDICES_PORT]->cbuffer().as<const int *>() +
-            inputs[INPUT_INDICES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int *input_values_i32_ptr = inputs[INPUT_VALUES_PORT]->cbuffer().as<const int *>() +
-            inputs[INPUT_VALUES_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const float *input_parameters_table_ptr = inputs[INPUT_PARAMETERS_TABLE_PORT]->cbuffer().as<const float *>() +
-            inputs[INPUT_PARAMETERS_TABLE_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        const int *input_default_value_ptr = inputs[INPUT_DEFAULT_VALUE_PORT]->cbuffer().as<const int *>() +
-            inputs[INPUT_DEFAULT_VALUE_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        size_t input_default_value = static_cast<size_t>(*input_default_value_ptr);
-        const float *input_weights_ptr = nullptr;
-        if (with_weights) {
-            input_weights_ptr = inputs[INPUT_WEIGHTS_PORT]->cbuffer().as<const float *>() +
-                inputs[INPUT_WEIGHTS_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        }
-        float *output_ptr = outputs[OUTPUT_PORT]->cbuffer().as<float *>() +
-            inputs[OUTPUT_PORT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        // fill the output tensor with default values
-        for (size_t batch_ind = 0; batch_ind < output_batch_size; batch_ind++) {
-            const float *param_elem_ptr = input_parameters_table_ptr + input_default_value * output_elem_size;
-            float *output_elem_ptr = output_ptr + batch_ind * output_elem_size;
-            for (size_t ind = 0; ind < output_elem_size; ind++) {
-                output_elem_ptr[ind] = param_elem_ptr[ind];
-            }
-        }
-
-        // initialize a vector with segment number values
-        std::vector<float> segment_nums(output_batch_size, 0.0f);
-
-        // compute the output tensor
-        int prev_indice_x = -1;
-        for (size_t curr_value_ind = 0; curr_value_ind < input_num_values; curr_value_ind++) {
-            int indice_x = 0;
-            size_t value = 0;
-            indice_x = input_indices_i32_ptr[2 * curr_value_ind];
-            value = static_cast<size_t>(input_values_i32_ptr[curr_value_ind]);
-            const float *param_elem_ptr = input_parameters_table_ptr + value * output_elem_size;
-            float *output_elem_ptr = output_ptr + indice_x * output_elem_size;
-            if (prev_indice_x != indice_x) {
-                // zero a slice
-                prev_indice_x = indice_x;
-                for (size_t ind = 0; ind < output_elem_size; ind++) {
-                    output_elem_ptr[ind] = 0.0f;
-                }
-            }
-            float weight = 1.0f;
-            if (with_weights) {
-                weight = input_weights_ptr[curr_value_ind];
-            }
-            segment_nums[indice_x] += weight;
-            for (size_t ind = 0; ind < output_elem_size; ind++) {
-                output_elem_ptr[ind] += param_elem_ptr[ind] * weight;
-            }
-        }
-
-        return OK;
-    }
-
-private:
-    const size_t INPUT_INDICES_PORT = 0;
-    const size_t INPUT_VALUES_PORT = 1;
-    const size_t INPUT_DENSE_SHAPE_PORT = 2;
-    const size_t INPUT_PARAMETERS_TABLE_PORT = 3;
-    const size_t INPUT_DEFAULT_VALUE_PORT = 4;
-    const size_t INPUT_WEIGHTS_PORT = 5;
-    const size_t OUTPUT_PORT = 0;
-
-    size_t input_num_values = 0;
-    size_t output_batch_size = 0;
-    size_t output_elem_size = 0;
-
-    ReducedOp reduction_op;
-    bool with_weights = false;
-
-    Precision input_indices_precision;
-    Precision input_values_precision;
-    Precision input_dense_shape_precision;
-    Precision input_parameters_table_precision;
-    Precision input_default_value_precision;
-};
-
-REG_FACTORY_FOR(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp b/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
deleted file mode 100644
index f334e371b21..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <cassert>
-#include "ie_parallel.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class SqueezeImpl: public ExtLayerBase {
-public:
-    explicit SqueezeImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-
-            if (layer->insData.size() != 1 && layer->insData.size() != 2)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
-
-            SizeVector data_dims = layer->insData[0].lock()->getTensorDesc().getDims();
-            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
-            if (data_dims.size() < dst_dims.size())
-                IE_THROW() << layer->name << " Incorrect number of input/output dimensions!";
-
-            if (layer->insData.size() == 1)
-                addConfig(layer, { { ConfLayout::PLN, false, 0 } }, { { ConfLayout::PLN, false, 0 } });
-            else
-                addConfig(layer, { { ConfLayout::PLN, false, 0 }, { ConfLayout::PLN, false, 0 } }, { { ConfLayout::PLN, false, 0 } });
-
-            // WA to enable the implementation only for equal input and output precisions
-            confs[0].inConfs[0].desc.setPrecision(confs[0].outConfs[0].desc.getPrecision());
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        const uint8_t *src = inputs[0]->cbuffer().as<uint8_t *>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding()*inputs[0]->element_size();
-        uint8_t* dst = outputs[0]->cbuffer().as<uint8_t *>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding()*outputs[0]->element_size();
-
-        if (src != dst) {
-            size_t srcSize = inputs[0]->byteSize();
-            size_t dstSize = outputs[0]->byteSize();
-            cpu_memcpy_s(dst, dstSize, src, srcSize);
-        }
-
-        return OK;
-    }
-};
-
-REG_FACTORY_FOR(SqueezeImpl, Squeeze);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/topk.cpp b/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
index 7da9f3521b8..31cd81b410d 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
@@ -11,7 +11,11 @@
 #include <vector>
 #include <cassert>
 #include <functional>
+
 #include "ie_parallel.hpp"
+#include <ngraph/op/topk.hpp>
+#include "common/tensor_desc_creator.h"
+#include "utils/general_utils.h"
 #if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
 #include <immintrin.h>
 #endif
@@ -20,51 +24,53 @@ namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
+
 class TopKImpl: public ExtLayerBase {
 public:
-    explicit TopKImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() != 2)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
-
-            if (layer->outData.size() != 1 && layer->outData.size() != 2)
-                IE_THROW() << layer->name << " Incorrect number of output edges!";
-
-            if (layer->insData[TOPK_K].lock()->getTensorDesc().getDims().size() > 1)
-                IE_THROW() << layer->name << " TopKImpl - Index vector should be 1 dimension";
-
-            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
-            SizeVector src_data_dims = layer->insData[TOPK_DATA].lock()->getTensorDesc().getDims();
-            if (src_data_dims.size() != dst_dims.size())
-                IE_THROW() << layer->name << " TopKImpl - Incorrect input/output tensor dimension sizes";
-
-            if (layer->outData.size() == 2) {
-                SizeVector dst_idx_dims = layer->outData[TOPK_INDEX]->getTensorDesc().getDims();
-                if (dst_dims.size() != dst_idx_dims.size())
-                    IE_THROW() << layer->name << " Incorrect output tensor dimension sizes";
-
-                for (size_t i = 0; i < dst_dims.size(); i++) {
-                    if (dst_dims[i] != dst_idx_dims[i])
-                        IE_THROW() << layer->name << " Input/output tensor dimension mismatch";
-                }
+            auto topKOp = ngraph::as_type_ptr<const ngraph::op::v1::TopK>(op);
+            if (!topKOp) {
+                errorMessage = "Node is not an instance of the TopK from the operations set v1 or v3";
+                return false;
             }
+            if (topKOp->get_mode() != ngraph::op::TopKMode::MAX &&
+                    topKOp->get_mode() != ngraph::op::TopKMode::MIN) {
+                errorMessage = "Unsupported mode.";
+                return false;
+            }
+            if (!MKLDNNPlugin::one_of(topKOp->get_sort_type(), ngraph::op::TopKSortType::NONE,
+                    ngraph::op::TopKSortType::SORT_VALUES,
+                    ngraph::op::TopKSortType::SORT_INDICES)) {
+                errorMessage = "Unsupported sort type.";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            src_dims = layer->insData[TOPK_DATA].lock()->getTensorDesc().getDims();
-            int axis_ = layer->GetParamAsInt("axis", -1);
-            if (axis_ < 0)
-                axis_ += src_dims.size();
+    explicit TopKImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+                IE_THROW(NotImplemented) << errorMessage;
+            }
+            auto topK1Op = ngraph::as_type_ptr<ngraph::op::v1::TopK>(op);
 
-            axis = static_cast<size_t>(axis_);
+            SizeVector dstDims = topK1Op->get_output_shape(TOPK_VALUE);
+            src_dims = topK1Op->get_input_shape(TOPK_DATA);
 
-            if (src_dims.size() < (1 + axis))
-                IE_THROW() << layer->name << " Incorrect input parameters dimensions and axis number!";
+            axis = topK1Op->get_axis();
 
-            if (layer->GetParamAsString("mode", "max") == "max")
+            if (topK1Op->get_mode() == ngraph::op::TopKMode::MAX)
                 mode_max = true;
             else
                 mode_max = false;
 
-            if (layer->GetParamAsString("sort", "index") == "value")
+            if (topK1Op->get_sort_type() == ngraph::op::TopKSortType::SORT_VALUES)
                 sort_value = true;
             else
                 sort_value = false;
@@ -77,33 +83,27 @@ public:
 
             for (size_t i = 0; i < axis; i++) {
                 axis_step *= src_dims[i];
-                if (src_data_dims[i] != dst_dims[i])
-                    IE_THROW() << layer->name << " Input/output tensor dimension mismatch";
             }
             axis_dim = src_dims[axis];
             for (size_t i = (axis + 1); i < src_dims.size(); i++) {
                 axis_stride *= src_dims[i];
-                if (src_data_dims[i] != dst_dims[i])
-                    IE_THROW() << layer->name << " Input/output tensor dimension mismatch";
             }
             dim = static_cast<int>(src_dims[axis]);
             before_num = count(src_dims, 0, axis);
 
-            if (layer->outData.size() == 1) {
-                addConfig(layer, { DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::I32) },
-                    { DataConfigurator(ConfLayout::PLN) });
+            if (topK1Op->get_output_size() == 1) {
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
             } else {
-                addConfig(layer, { DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::I32) },
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN) });
-
-                // TODO: WA... While ICNNNetwork has no clear rule to fill tensor precision
-                //       it use precision of parent layer. So each output tensor Data object has
-                //       precision of producing layer. For TopK that is not true. Second output is
-                //       integer tensor. Will change it for corresponding output desc.
-                confs.back().outConfs[1].desc.setPrecision(Precision::I32);
+                addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                              {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                               {TensorDescCreatorTypes::ncsp, Precision::I32}});
             }
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
+            throw;
         }
     }
 
@@ -455,15 +455,15 @@ public:
 
         if (outputs.size() == 1) {
             if (outputs[0]->getTensorDesc().getPrecision() == Precision::FP32) {
-                dst_data = outputs[0]->cbuffer().as<float *>() +
+                dst_data = outputs[0]->buffer().as<float *>() +
                     outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
             } else {
-                dst_idx = outputs[0]->cbuffer().as<int *>() +
+                dst_idx = outputs[0]->buffer().as<int *>() +
                     outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
             }
-            SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
+            SizeVector dstDims = outputs[0]->getTensorDesc().getDims();
 
-            if (dst_dims[axis] != static_cast<size_t>(src_k)) {
+            if (dstDims[axis] != static_cast<size_t>(src_k)) {
                 if (resp) {
                     std::string errorMsg = "Output tensor dimension mismatch";
                     errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
@@ -471,11 +471,11 @@ public:
                 return PARAMETER_MISMATCH;
             }
         } else if (outputs.size() == 2) {
-            dst_data = outputs[TOPK_VALUE]->cbuffer().as<float *>() +
+            dst_data = outputs[TOPK_VALUE]->buffer().as<float *>() +
                 outputs[TOPK_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding();
             SizeVector dst_data_dims = outputs[TOPK_VALUE]->getTensorDesc().getDims();
 
-            dst_idx = outputs[TOPK_INDEX]->cbuffer().as<int *>() +
+            dst_idx = outputs[TOPK_INDEX]->buffer().as<int *>() +
                 outputs[TOPK_INDEX]->getTensorDesc().getBlockingDesc().getOffsetPadding();
             SizeVector dst_idx_dims = outputs[TOPK_INDEX]->getTensorDesc().getDims();
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
index f8f51ea33ad..e4203ef564f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp
@@ -7,7 +7,9 @@
 #include <cassert>
 #include <vector>
 #include "common/cpu_memcpy.h"
+#include <ngraph/opsets/opset6.hpp>
 
+using MKLDNNPlugin::TensorDescCreatorTypes;
 
 namespace InferenceEngine {
 namespace Extensions {
@@ -26,21 +28,42 @@ private:
 
     const int OUTPUT_ROIS {0};
 
-public:
-    explicit ExperimentalDetectronTopKROIsImpl(const CNNLayer* layer) {
+    bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            if (layer->insData.size() != 2 || layer->outData.empty())
-                IE_THROW() << "Incorrect number of input/output edges!";
+            const auto topKROI = std::dynamic_pointer_cast<const ngraph::opset6::ExperimentalDetectronTopKROIs>(op);
+            if (!topKROI) {
+                errorMessage = "Only opset6 ExperimentalDetectronTopKROIs operation is supported";
+                return false;
+            }
+        } catch (...) {
+            return false;
+        }
+        return true;
+    }
 
-            if (layer->insData[INPUT_ROIS].lock()->getTensorDesc().getDims().size() != 2 ||
-                layer->insData[INPUT_PROBS].lock()->getTensorDesc().getDims().size() != 1)
-                IE_THROW() << "Unsupported shape of input blobs!";
+    std::string errorPrefix;
 
-            max_rois_num_ = layer->GetParamAsInt("max_rois", 0);
+public:
+    explicit ExperimentalDetectronTopKROIsImpl(const std::shared_ptr<ngraph::Node>& op) {
+        try {
+            std::string errorMessage;
+            if (!isSupportedOperation(op, errorMessage)) {
+              IE_THROW(NotImplemented) << errorMessage;
+            }
 
-            addConfig(layer,
-                      {DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32)},
-                      {DataConfigurator(ConfLayout::PLN, Precision::FP32)});
+            errorPrefix = "ExperimentalDetectronTopKROIs layer with name '" + op->get_friendly_name() + "'";
+            const auto topKROI = std::dynamic_pointer_cast<const ngraph::opset6::ExperimentalDetectronTopKROIs>(op);
+            if (op->get_input_size() != 2 || op->get_output_size() != 1)
+                IE_THROW() << errorPrefix << " has incorrect number of input/output edges!";
+
+            if (op->get_input_shape(INPUT_ROIS).size() != 2 || op->get_input_shape(INPUT_PROBS).size() != 1)
+                IE_THROW() << errorPrefix << " has nsupported input shape";
+
+            max_rois_num_ = topKROI->get_max_rois();
+
+            addConfig(op, {{TensorDescCreatorTypes::ncsp, Precision::FP32},
+                           {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                          {{TensorDescCreatorTypes::ncsp, Precision::FP32}});
         } catch (InferenceEngine::Exception &ex) {
             errorMsg = ex.what();
         }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/unique.cpp b/inference-engine/src/mkldnn_plugin/nodes/unique.cpp
deleted file mode 100644
index f67a22f49ec..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/unique.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <unordered_map>
-#include <cassert>
-#include <algorithm>
-#include <functional>
-#include <limits>
-#include <utility>
-#include "ie_parallel.hpp"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class UniqueImpl : public ExtLayerBase {
-public:
-    explicit UniqueImpl(const CNNLayer* layer) {
-        try {
-            // check number of inputs and outputs
-            if (layer->insData.size() != 1 || layer->outData.size() < 1 || layer->outData.size() > 3) {
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-            }
-
-            // check precision of tensors
-            Precision input_indices_precision = layer->insData[0].lock()->getTensorDesc().getPrecision();
-            if (input_indices_precision != Precision::FP32) {
-                IE_THROW() << layer->name << " Incorrect input precision. Only FP32 is supported!";
-            }
-
-            // check attributes
-            sorted = layer->GetParamAsBool("sorted");
-            return_inverse = layer->GetParamAsBool("return_inverse");
-            return_counts = layer->GetParamAsBool("return_counts");
-
-            // check that a real number of outputs matches one claimed by attributes
-            size_t claimed_num_outputs = 1;
-            if (return_inverse) {
-                claimed_num_outputs++;
-            }
-            if (return_counts) {
-                claimed_num_outputs++;
-            }
-            if (layer->outData.size() != claimed_num_outputs) {
-                IE_THROW() << layer->name << " A number of outputs claimed by attributes does not match a real number of outputs!";
-            }
-
-            // check dimensions of input tensors
-            SizeVector input_dims = layer->insData[0].lock()->getTensorDesc().getDims();
-            if (input_dims.size() != 1) {
-                IE_THROW() << layer->name << " Input must be 1-D tensor.";
-            }
-            num_elements = input_dims[0];
-
-            // check dimensions of output tensors and its precisions
-            size_t cur_output_port = 0;
-            SizeVector output_uniques_dims = layer->outData[cur_output_port]->getTensorDesc().getDims();
-            if (output_uniques_dims.size() != 1 || output_uniques_dims[0] != num_elements) {
-                IE_THROW() << layer->name << " Incorrect dimensions for output tensor of unique elements.";
-            }
-            if (return_inverse) {
-                cur_output_port++;
-                SizeVector output_indices_dims = layer->outData[cur_output_port]->getTensorDesc().getDims();
-                if (output_indices_dims.size() != 1 || output_indices_dims[0] != num_elements) {
-                    IE_THROW() << layer->name << " Incorrect dimensions for output tensor of indices.";
-                }
-            }
-            if (return_counts) {
-                cur_output_port++;
-                SizeVector output_counts_dims = layer->outData[cur_output_port]->getTensorDesc().getDims();
-                if (output_counts_dims.size() != 1 || output_counts_dims[0] != num_elements) {
-                    IE_THROW() << layer->name << " Incorrect dimensions for output tensor of counts.";
-                }
-            }
-
-            // add a layer configuration
-            if (layer->outData.size() == 1) {
-                addConfig(layer,
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32) },
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32) });
-            } else if (layer->outData.size() == 2) {
-                addConfig(layer,
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32) },
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32) });
-            } else if (layer->outData.size() == 3) {
-                addConfig(layer,
-                    { DataConfigurator(ConfLayout::PLN, Precision::FP32) }, { DataConfigurator(ConfLayout::PLN, Precision::FP32),
-                    DataConfigurator(ConfLayout::PLN, Precision::FP32), DataConfigurator(ConfLayout::PLN, Precision::FP32) });
-            }
-        }
-        catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        const float *input_ptr = inputs[0]->cbuffer().as<const float *>() +
-            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        size_t cur_output_port = 0;
-        float *output_uniques_ptr = outputs[cur_output_port]->cbuffer().as<float *>() +
-            outputs[cur_output_port]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        float *output_indices_ptr = nullptr;
-        if (return_inverse) {
-            cur_output_port++;
-            output_indices_ptr = outputs[cur_output_port]->cbuffer().as<float *>() +
-                outputs[cur_output_port]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        }
-        float *output_counts_ptr = nullptr;
-        if (return_counts) {
-            cur_output_port++;
-            output_counts_ptr = outputs[cur_output_port]->cbuffer().as<float *>() +
-                outputs[cur_output_port]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        }
-
-        // create a copy since input can be changed by sorting
-        std::vector<float> input_copy(num_elements);
-        std::copy(input_ptr, input_ptr + num_elements, input_copy.begin());
-
-        // sort elements in the input copy
-        if (sorted) {
-            parallel_sort(input_copy.begin(), input_copy.end(), std::less<float>());
-        }
-
-        // walk through elements and save them along with its indice and occurences
-        std::unordered_map<float, float> indices;
-        for (size_t i = 0, num_unique_elements = 0; i < num_elements; i++) {
-            auto it = indices.find(input_copy[i]);
-            if (it == indices.end()) {
-                indices.insert(std::make_pair(input_copy[i], static_cast<float>(num_unique_elements)));
-                output_uniques_ptr[num_unique_elements] = input_copy[i];
-                if (return_inverse && !sorted) {
-                    output_indices_ptr[i] = static_cast<float>(num_unique_elements);
-                }
-                if (return_counts) {
-                    output_counts_ptr[num_unique_elements] = 1.0f;
-                }
-                num_unique_elements++;
-            } else {
-                if (return_inverse && !sorted) {
-                    output_indices_ptr[i] = it->second;
-                }
-                if (return_counts) {
-                    output_counts_ptr[static_cast<size_t>(it->second)] += 1.0f;
-                }
-            }
-        }
-
-        // compute indices individually when unique elements are known
-        if (sorted && return_inverse) {
-            for (size_t i = 0; i < num_elements; i++) {
-                auto it = indices.find(input_ptr[i]);
-                output_indices_ptr[i] = it->second;
-            }
-        }
-
-        // fill a tail with the latest unique element used as an end mark
-        size_t num_unique_elements = indices.size();
-        if ((num_elements - num_unique_elements) > 0) {
-            std::fill(output_uniques_ptr + num_unique_elements,
-                output_uniques_ptr + num_elements,
-                output_uniques_ptr[num_unique_elements - 1]);
-        }
-
-        // fill a tail for output buffer with counts
-        if (return_counts && (num_elements - num_unique_elements) > 0) {
-                std::fill(output_counts_ptr + num_unique_elements,
-                    output_counts_ptr + num_elements, 0.f);
-        }
-
-        return OK;
-    }
-
-private:
-    // attributes
-    bool sorted;
-    bool return_inverse;
-    bool return_counts;
-
-    size_t num_elements = 0;
-};
-
-REG_FACTORY_FOR(UniqueImpl, Unique);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp b/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
deleted file mode 100644
index 308542a702a..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "base.hpp"
-
-#include <cmath>
-#include <string>
-#include <vector>
-#include <cassert>
-#include "ie_parallel.hpp"
-#include "common/cpu_memcpy.h"
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class UnsqueezeImpl: public ExtLayerBase {
-public:
-    explicit UnsqueezeImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.empty() || layer->outData.empty())
-                IE_THROW() << layer->name << " Incorrect number of input/output edges!";
-
-            if (layer->insData.size() != 1 && layer->insData.size() != 2)
-                IE_THROW() << layer->name << " Incorrect number of input edges!";
-
-            if (layer->insData.size() == 1)
-                addConfig(layer, { { ConfLayout::PLN, false, 0 } }, { { ConfLayout::PLN, false, 0 } });
-            else
-                addConfig(layer, { { ConfLayout::PLN, false, 0 }, { ConfLayout::PLN, false, 0 } }, { { ConfLayout::PLN, false, 0 } });
-
-            // WA to enable the implementation only for equal input and output precisions
-            confs[0].inConfs[0].desc.setPrecision(confs[0].outConfs[0].desc.getPrecision());
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
-        const uint8_t *src = inputs[0]->cbuffer().as<uint8_t *>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding()*inputs[0]->element_size();
-        uint8_t* dst = outputs[0]->cbuffer().as<uint8_t *>() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding()*outputs[0]->element_size();
-
-        if (src != dst) {
-            size_t srcSize = inputs[0]->byteSize();
-            size_t dstSize = outputs[0]->byteSize();
-            cpu_memcpy_s(dst, dstSize, src, srcSize);
-        }
-
-        return OK;
-    }
-};
-
-REG_FACTORY_FOR(UnsqueezeImpl, Unsqueeze);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp
new file mode 100644
index 00000000000..0754e346a6e
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp
@@ -0,0 +1,94 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+
+namespace MKLDNNPlugin {
+
+/**
+* @brief Returns normalized by size dims where missing dimensions are filled with units from the beginning
+* Example: dims = {2, 3, 5}; ndims = 5; result = {1, 1, 2, 3, 5}
+* @param dims
+* shape to normalize
+* @param ndims
+* rank of resulting shape
+* @return normalized vector
+*/
+inline std::vector<size_t> getNormalizedDimsBySize(const InferenceEngine::SizeVector &dims, size_t ndims) {
+    if (dims.size() >= ndims)
+        return dims;
+
+    std::vector<size_t> normalizedDims = dims;
+    for (size_t i = 0; i < (ndims - dims.size()); i++) {
+        normalizedDims.insert(normalizedDims.begin(), 1);
+    }
+    return normalizedDims;
+}
+
+/**
+* @brief Checked that secondInputDims unidirectional broadcastable per tensor or per channel to firstInputDims
+* @param firstInputDims
+* shape on which should be broadcastable
+* @param secondInputDims
+* shape which should be broadcastable
+* @return true if broadcastable, false otherwise.
+*/
+inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, const InferenceEngine::SizeVector& secondInputDims) {
+    if (secondInputDims.size() > firstInputDims.size())
+        return false;
+    if (std::accumulate(secondInputDims.begin(), secondInputDims.end(), 1, std::multiplies<size_t>()) == 1)
+        return true;
+
+    std::vector<size_t> normalizedSecondInputDims = getNormalizedDimsBySize(secondInputDims, firstInputDims.size());
+    for (size_t i = 0; i < normalizedSecondInputDims.size(); i++) {
+        if ((i == 1 && normalizedSecondInputDims[i] != firstInputDims[1]) || (i != 1 && normalizedSecondInputDims[i] != 1))
+            return false;
+    }
+    return true;
+}
+
+inline bool isEmptyTensorDesc(const InferenceEngine::TensorDesc &td) {
+    const auto dims = td.getDims();
+    return std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
+}
+
+/**
+* @brief Return precision to which given precision must be converted to be supported in plug-in
+* @param precision
+* precision for convert
+* @return plug-in supported precision or UNSPECIFIED if precision unsupported
+*/
+inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine::Precision precision) {
+    switch (precision) {
+        case InferenceEngine::Precision::U8:
+        case InferenceEngine::Precision::I8:
+        case InferenceEngine::Precision::I32:
+        case InferenceEngine::Precision::BF16:
+        case InferenceEngine::Precision::FP32: {
+            break;
+        }
+        case InferenceEngine::Precision::BOOL: {
+            precision = InferenceEngine::Precision::U8;
+            break;
+        }
+        case InferenceEngine::Precision::U16:
+        case InferenceEngine::Precision::I16:
+        case InferenceEngine::Precision::I64:
+        case InferenceEngine::Precision::U64: {
+            precision = InferenceEngine::Precision::I32;
+            break;
+        }
+        case InferenceEngine::Precision::FP16: {
+            precision = InferenceEngine::Precision::FP32;
+            break;
+        }
+        default: {
+            precision = InferenceEngine::Precision::UNSPECIFIED;
+        }
+    }
+    return precision;
+}
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/utils/general_utils.h b/inference-engine/src/mkldnn_plugin/utils/general_utils.h
index d73a6737f30..952bf43dbf5 100644
--- a/inference-engine/src/mkldnn_plugin/utils/general_utils.h
+++ b/inference-engine/src/mkldnn_plugin/utils/general_utils.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <cassert>
+#include <inference_engine.hpp>
 
 namespace MKLDNNPlugin {
 
@@ -39,5 +40,34 @@ constexpr inline bool implication(bool cause, bool cond) {
     return !cause || !!cond;
 }
 
+inline std::string getExceptionDescWithoutStatus(const InferenceEngine::Exception& ex) {
+    std::string desc = ex.what();
+    IE_SUPPRESS_DEPRECATED_START
+    if (ex.getStatus() != 0) {
+        size_t pos = desc.find("]");
+        if (pos != std::string::npos) {
+            if (desc.size() == pos + 1) {
+                desc.erase(0, pos + 1);
+            } else {
+                desc.erase(0, pos + 2);
+            }
+        }
+    }
+    IE_SUPPRESS_DEPRECATED_END
+
+    return desc;
+}
+
+template<typename T>
+std::string vec2str(const std::vector<T> &vec) {
+    if (!vec.empty()) {
+        std::ostringstream result;
+        result << "(";
+        std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator<T>(result, "."));
+        result << vec.back() << ")";
+        return result.str();
+    }
+    return std::string("()");
+}
 
 }  // namespace MKLDNNPlugin
\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp
new file mode 100644
index 00000000000..d4ea7a2088b
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cassert>
+#include <ngraph/variant.hpp>
+#include "transformations/rt_info/primitives_priority_attribute.hpp"
+
+namespace MKLDNNPlugin {
+
+inline std::string getRTInfoValue(const std::map<std::string, std::shared_ptr<ngraph::Variant>>& rtInfo, std::string paramName) {
+    auto it = rtInfo.find(paramName);
+    if (it != rtInfo.end()) {
+        auto value = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(it->second);
+        return value->get();
+    } else {
+        return "";
+    }
+};
+
+inline std::string getPrimitivesPriorityValue(const std::shared_ptr<ngraph::Node> &node) {
+    const auto &rtInfo = node->get_rt_info();
+    using PrimitivesPriorityWraper = ngraph::VariantWrapper<ngraph::PrimitivesPriority>;
+
+    if (!rtInfo.count(PrimitivesPriorityWraper::type_info.name)) return "";
+
+    const auto &attr = rtInfo.at(PrimitivesPriorityWraper::type_info.name);
+    ngraph::PrimitivesPriority pp = ngraph::as_type_ptr<PrimitivesPriorityWraper>(attr)->get();
+    return pp.getPrimitivesPriority();
+}
+
+template <typename T>
+inline const std::shared_ptr<T> getNgraphOpAs(const std::shared_ptr<ngraph::Node>& op) {
+    auto typedOp = ngraph::as_type_ptr<T>(op);
+    if (!typedOp)
+        IE_THROW() << "Can't get ngraph node " << op->get_type_name() << " with name " << op->get_friendly_name();
+    return typedOp;
+}
+
+}  // namespace MKLDNNPlugin
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp
index 57b1d99728c..6f41e879e7a 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp
@@ -195,6 +195,8 @@ protected:
 };
 
 TEST_P(BF16NetworkRestore1, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/concat_in_place.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/concat_in_place.cpp
index 242c09e98ff..6e5f1161d4c 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/concat_in_place.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/concat_in_place.cpp
@@ -128,13 +128,15 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
         expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["CONV_2"] = "BF16";
     }
 };
 
 TEST_P(Concat_in_place, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_add.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_add.cpp
index bcbe3d4d7a3..7ba4ff64729 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_add.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_add.cpp
@@ -111,11 +111,13 @@ protected:
         // STAGE3:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["Elt_sum"] = "BF16";
+        expectedPrecisions["Elt_sum"] = "ndef";
     }
 };
 
 TEST_P(ConvAdd, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp
index d18cc7ca6f2..8695fdc3e53 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp
@@ -98,13 +98,15 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
         expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["CONV_2"] = "BF16";
     }
 };
 
 TEST_P(ConvConv, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp
index 9d6c7180d32..0f6350b5717 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp
@@ -117,13 +117,15 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
         expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["RELU"] = "ndef";
     }
 };
 
 TEST_P(ConvDWConvReLU, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_eltwise_depthwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_eltwise_depthwise.cpp
index 83fc049b678..12b04746c63 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_eltwise_depthwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_eltwise_depthwise.cpp
@@ -222,6 +222,8 @@ public:
 };
 
 TEST_P(ConvEltwiseDepthwise, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     Run_test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp
index 438510c3eff..536e92a142b 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp
@@ -162,7 +162,7 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["Convolution_1"] = "FP32";
+        expectedPrecisions["Convolution_1"] = "BF16";
         expectedPrecisions["ReLU_1"] = "ndef";
         expectedPrecisions["AvgPool_1"] = "BF16";
         expectedPrecisions["Convolution_2"] = "BF16";
@@ -173,6 +173,8 @@ protected:
 };
 
 TEST_P(ConvReLUPoolConvReLUPool, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_max.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_max.cpp
index 779079f778d..95eedfc6d81 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_max.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_max.cpp
@@ -126,6 +126,8 @@ protected:
 };
 
 TEST_P(Elt_max, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_x3.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_x3.cpp
index a95ee393681..ff3c20e541e 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_x3.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_x3.cpp
@@ -184,6 +184,8 @@ protected:
 };
 
 TEST_P(Elt_x3, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp
index 2ee786e2ee8..04928f1eed9 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp
@@ -105,12 +105,14 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["Add_4"] = "FP32";
+        expectedPrecisions["Add_4"] = "ndef";
         expectedPrecisions["Convolution_6"] = "BF16";
     }
 };
 
 TEST_P(Faster100_5_1_1_Conv, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp
index a5c2af32afd..8436ce73207 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp
@@ -105,6 +105,8 @@ protected:
 };
 
 TEST_P(Gather_multiply, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_x2_add_mul_relu_concat_matmul.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_x2_add_mul_relu_concat_matmul.cpp
index 2ccaa066312..74b50d158d7 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_x2_add_mul_relu_concat_matmul.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_x2_add_mul_relu_concat_matmul.cpp
@@ -123,7 +123,7 @@ protected:
         // performance counters
         expectedPrecisions["Matmul_0"] = "BF16";
         expectedPrecisions["Mul_1"] = "BF16";
-        expectedPrecisions["Add_1"] = "FP32";
+        expectedPrecisions["Add_1"] = "BF16";
         expectedPrecisions["Relu_1"] = "ndef";
         expectedPrecisions["Conc_1"] = "BF16";
         expectedPrecisions["Matmul_1"] = "BF16";
@@ -131,6 +131,8 @@ protected:
 };
 
 TEST_P(Gather_x2_add_mul_relu_concat_matmul, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/memory_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/memory_conv.cpp
index 1540bbacc0f..6a1c34908cb 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/memory_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/memory_conv.cpp
@@ -73,6 +73,8 @@ protected:
 };
 
 TEST_P(MemoryConv, CheckTypeConversion) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     if (!InferenceEngine::with_cpu_x86_bfloat16())
         GTEST_SKIP();
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp
index 6a640bb8e5a..3215e553e57 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp
@@ -159,6 +159,8 @@ protected:
 };
 
 TEST_P(MobileNet_ssd_with_branching, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp
index 0a4d0aee7a5..98f7ee43614 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp
@@ -123,14 +123,16 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
-        expectedPrecisions["CONV_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["CONV_2"] = "BF16";
         expectedPrecisions["ELT_1"] = "ndef";
     }
 };
 
 TEST_P(ScaleshiftConvEltwiseConv, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
index 2fb3871a496..014ab08e5f0 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
@@ -127,8 +127,8 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
-        expectedPrecisions["CONV_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
+        expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["CONV_2"] = "BF16";
         expectedPrecisions["RELU_1"] = "ndef";
         expectedPrecisions["ELT_1"] = "ndef";
@@ -136,6 +136,8 @@ protected:
 };
 
 TEST_P(ScaleshiftConvEltwiseReluConv, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
index 5160bc9d9ac..8abb82066de 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
@@ -121,13 +121,15 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
         expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["ELT_1"] = "ndef";
     }
 };
 
 TEST_P(ScaleshiftConvEltwiseScaleshift, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp
index 5ba4893e2e7..f9735e60bc3 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp
@@ -109,13 +109,15 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
         expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["CONV_2"] = "BF16";
     }
 };
 
 TEST_P(ScaleshiftConvEluConv, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp
index 416f6262f95..33d494658ba 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp
@@ -93,17 +93,19 @@ protected:
         fnPtr = createGraph(netPrecision);
 
         // STAGE1:
-        threshold = 9e-2;
+        threshold = 1e-1;
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
         expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["RELU_1"] = "ndef";
     }
 };
 
 TEST_P(ScaleshiftConvRelu, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp
index 46928de50a0..d00b5144b0f 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp
@@ -114,13 +114,15 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
         expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["CONV_2"] = "BF16";
     }
 };
 
 TEST_P(ScaleshiftConv_x2_ConcatRelu, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp
index 99908120e0a..8a46347e5a7 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp
@@ -106,7 +106,7 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
         expectedPrecisions["CONV_1"] = "BF16";
         expectedPrecisions["CONV_2"] = "BF16";
         expectedPrecisions["ELT_1"] = "ndef";
@@ -114,6 +114,8 @@ protected:
 };
 
 TEST_P(ScaleshiftConv_x2_Eltwise, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
index 47a21b7bd86..b517c66a9e0 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
@@ -106,14 +106,16 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["ADD_1"] = "ndef";
         expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["CONV_2"] = "FP32";
+        expectedPrecisions["CONV_2"] = "BF16";
         expectedPrecisions["ELT_1"] = "ndef";
     }
 };
 
 TEST_P(ScaleshiftConv_x2_mixed1_Eltwise, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
index 13fe62fb00e..d8601d6f9dd 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
@@ -109,14 +109,16 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["CONV_1"] = "FP32";
-        expectedPrecisions["ADD_2"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["ADD_2"] = "ndef";
         expectedPrecisions["CONV_2"] = "BF16";
         expectedPrecisions["ELT_1"] = "ndef";
     }
 };
 
 TEST_P(ScaleshiftConv_x2_mixed2_Eltwise, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp
index 064692e44a8..aba2d02a111 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp
@@ -141,7 +141,7 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["Add_1"] = "FP32";
+        expectedPrecisions["Add_1"] = "ndef";
         expectedPrecisions["Convolution_1"] = "BF16";
         expectedPrecisions["Convolution_2"] = "BF16";
         expectedPrecisions["ELT_1"] = "ndef";
@@ -150,6 +150,8 @@ protected:
 };
 
 TEST_P(ScaleshiftConv_x3_Eltwise, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
index 0bb63e3629c..043aa6b87a0 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
@@ -128,14 +128,16 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["Add_1"] = "FP32";
-        expectedPrecisions["Add_2"] = "FP32";
+        expectedPrecisions["Add_1"] = "ndef";
+        expectedPrecisions["Add_2"] = "ndef";
         expectedPrecisions["Convolution_1"] = "BF16";
         expectedPrecisions["ELT_1"] = "ndef";
     }
 };
 
 TEST_P(Scaleshift_x2_Conv_x2_Eltwise, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
index 49dfd1bbb8e..a9916754d1e 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
@@ -147,15 +147,17 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["Add_1"] = "FP32";
+        expectedPrecisions["Add_1"] = "ndef";
         expectedPrecisions["Convolution_1"] = "BF16";
-        expectedPrecisions["Add_2"] = "FP32";
+        expectedPrecisions["Add_2"] = "ndef";
         expectedPrecisions["ELT_1"] = "ndef";
         expectedPrecisions["RELU_1"] = "ndef";
     }
 };
 
 TEST_P(Scaleshift_x3_ConvEltwiseRelu, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp
index c9e5c71a00b..a4dcc9713ca 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp
@@ -112,12 +112,14 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["Add_4"] = "FP32";
+        expectedPrecisions["Add_4"] = "ndef";
         expectedPrecisions["Convolution_6"] = "BF16";
     }
 };
 
 TEST_P(PoolingAfterConv, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp
index aa8e2e8c28a..32912caca0b 100644
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp
@@ -132,7 +132,7 @@ protected:
         // STAGE2:
         // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
         // performance counters
-        expectedPrecisions["Add_4"] = "FP32";
+        expectedPrecisions["Add_4"] = "ndef";
         expectedPrecisions["Convolution_1"] = "BF16";
         expectedPrecisions["Convolution_2"] = "BF16";
         expectedPrecisions["TopK_1"] = "FP32";
@@ -140,6 +140,8 @@ protected:
 };
 
 TEST_P(TopKInputsI32, CompareWithRefImpl) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     test();
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp
deleted file mode 100644
index b3bfd41267d..00000000000
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <common_test_utils/test_constants.hpp>
-#include "behavior/add_output.hpp"
-#include "functional_test_utils/test_model/test_model.hpp"
-#include "functional_test_utils/plugin_cache.hpp"
-
-InferenceEngine::CNNNetwork getTargetNetwork() {
-    auto model = FuncTestUtils::TestModel::getModelWithMemory(InferenceEngine::Precision::FP32);
-    auto ie = PluginCache::get().ie();
-    return ie->ReadNetwork(model.model_xml_str, model.weights_blob);
-}
-
-std::vector<addOutputsParams> testCases = {
-        addOutputsParams(getTargetNetwork(), {"Memory_1"}, CommonTestUtils::DEVICE_CPU)
-};
-
-INSTANTIATE_TEST_CASE_P(smoke_AddOutputBasic, AddOutputsTest,
-                        ::testing::ValuesIn(testCases),
-                        AddOutputsTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/memory_states.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/memory_states.cpp
deleted file mode 100644
index 548c7d05311..00000000000
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/memory_states.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <common_test_utils/test_constants.hpp>
-#include "behavior/memory_states.hpp"
-#include "functional_test_utils/test_model/test_model.hpp"
-#include "functional_test_utils/plugin_cache.hpp"
-
-InferenceEngine::CNNNetwork getNetwork() {
-    auto model = FuncTestUtils::TestModel::getModelWithMultipleMemoryConnections(InferenceEngine::Precision::FP32);
-    auto ie = PluginCache::get().ie();
-    return ie->ReadNetwork(model.model_xml_str, model.weights_blob);
-}
-std::vector<memoryStateParams> memoryStateTestCases = {
-        memoryStateParams(getNetwork(), {"c_1-3", "r_1-3"}, CommonTestUtils::DEVICE_CPU)
-};
-
-INSTANTIATE_TEST_CASE_P(smoke_VariableStateBasic, VariableStateTest,
-        ::testing::ValuesIn(memoryStateTestCases),
-        VariableStateTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_blob.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_blob.cpp
index ce1e87d5a51..923cf62a61c 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_blob.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_blob.cpp
@@ -14,6 +14,7 @@ const std::vector<Precision> precisionSet = {Precision::FP32, Precision::I16, Pr
 const std::vector<setType> typeSet = {setType::INPUT, setType::OUTPUT, setType::BOTH};
 
 const auto params = ::testing::Combine(::testing::ValuesIn(precisionSet),
+                                       ::testing::ValuesIn(precisionSet),
                                        ::testing::ValuesIn(typeSet),
                                        ::testing::Values(CommonTestUtils::DEVICE_CPU));
 
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/runtime_precision.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/runtime_precision.cpp
index 2ddda8ae6a1..f95ba68b777 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/runtime_precision.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/runtime_precision.cpp
@@ -19,7 +19,7 @@ const std::vector<RuntimePrecisionSpecificParams> params = {
         {makeEltwiseFunction, {Precision::BF16, Precision::BF16}, {{"Eltwise", Precision::BF16}}},
         {makeEltwiseFunction, {Precision::U8, Precision::U8}, {{"Eltwise", Precision::U8}}},
         {makeEltwiseFunction, {Precision::I8, Precision::I8}, {{"Eltwise", Precision::I8}}},
-        {makeFakeQuantizeReluFunction, {Precision::FP32}, {{"FakeQuantize", Precision::FP32}, {"Relu", Precision::U8}}},
+        {makeFakeQuantizeReluFunction, {Precision::FP32}, {{"FakeQuantize", Precision::FP32}, {"Relu_original", Precision::U8}}},
         {makeFakeQuantizeReluFunction, {Precision::U8}, {{"FakeQuantize", Precision::U8}, {"Relu", Precision::U8}}},
         {makeFakeQuantizeBinaryConvolutionFunction, {Precision::FP32}, {{"FakeQuantize", Precision::FP32}, {"BinaryConvolution", Precision::BIN}}},
 };
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
index dfcee6bd5fa..6794b83175b 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
@@ -71,10 +71,16 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic = {
 std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> preluBasic = {
         {{1, 50}, {{1}, {50}}},
         {{1, 128}, {{1}, {128}}},
-        {{20, 128}, {{20}, {128}, {20, 128}}},
-        {{1, 20, 128}, {{1}, {20}, {128}, {20, 128}}},
-        {{1, 20, 128, 128}, {{1}, {20}, {128}, {128, 128}, {20, 128, 128}}},
-        {{1, 20, 20, 128, 128}, {{1}, {20}, {128}, {128, 128}, {20, 128, 128}, {20, 20, 128, 128}}},
+        {{20, 128}, {{128}}},
+        {{1, 20, 128}, {{1}, {20}}},
+        {{1, 20, 128, 128}, {{1}, {20}}},
+        {{1, 20, 20, 128, 128}, {{1}, {20}}}
+        // according to spec second input for PRelu must be 1D and must be broadcastabe per channel
+        // at this moment these cases unsupported
+        // {{20, 128}, {{20}, {20, 128}}},
+        // {{1, 20, 128}, {{128}, {20, 128}}},
+        // {{1, 20, 128, 128}, {{128}, {128, 128}, {20, 128, 128}}},
+        // {{1, 20, 20, 128, 128}, {{128}, {128, 128}, {20, 128, 128}, {20, 20, 128, 128}}},
 };
 
 const auto basicCases = ::testing::Combine(
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp
index c82e332785f..325636314d7 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -103,6 +103,35 @@ INSTANTIATE_TEST_CASE_P(
         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
     ConvolutionLayerTest::getTestCaseName);
 
+// weight for this convolution have Acdb16a layout
+// for [96,1,7,7] shape strides for 1 and 3 dimensions equals, but not default order
+namespace specificWeightLayout {
+    const std::vector<size_t> kernels = {7, 7};
+    const std::vector<size_t> strides = {2, 2};
+    const std::vector<ptrdiff_t> padBegins = {1, 1};
+    const std::vector<ptrdiff_t> padEnds = {1, 1};
+    const std::vector<size_t> dilations = {1, 1};
+    const size_t numOutChannels = {96};
+    const auto conv2DParams_WeightLayout = ::testing::Combine(::testing::Values(kernels),
+                                                              ::testing::Values(strides),
+                                                              ::testing::Values(padBegins),
+                                                              ::testing::Values(padEnds),
+                                                              ::testing::Values(dilations),
+                                                              ::testing::Values(numOutChannels),
+                                                              ::testing::Values(ngraph::op::PadType::EXPLICIT));
+
+    INSTANTIATE_TEST_CASE_P(smoke_Convolution2D_SpecificWeightLayout, ConvolutionLayerTest,
+                                ::testing::Combine(conv2DParams_WeightLayout,
+                                                   ::testing::ValuesIn(netPrecisions),
+                                                   ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                                   ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                                   ::testing::Values(InferenceEngine::Layout::ANY),
+                                                   ::testing::Values(InferenceEngine::Layout::ANY),
+                                                   ::testing::Values(std::vector<size_t>({1, 1, 50, 75})),
+                                                   ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            ConvolutionLayerTest::getTestCaseName);
+} // namespace specificWeightLayout
+
 /* ============= 3D Convolution ============= */
 const std::vector<std::vector<size_t>> kernels3d = {{3, 3, 3}, {3, 5, 3}};
 const std::vector<std::vector<ptrdiff_t>> paddings3d = {{0, 0, 0}, {0, 2, 0}};
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lrn.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lrn.cpp
index fe97e4626b6..916abf39f4c 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lrn.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/lrn.cpp
@@ -10,20 +10,19 @@
 
 using namespace LayerTestsDefinitions;
 
-namespace {
-// Common params
-
-const std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
-                                                               InferenceEngine::Precision::FP16};
-
-const std::vector<std::vector<int64_t>> axes = {{1}, {2, 3}};
-
+const std::vector<InferenceEngine::Precision> netPrecisions{
+    InferenceEngine::Precision::FP32
+};
 const double alpha = 9.9e-05;
 const double beta = 2;
 const double bias = 1.0;
 const size_t size = 5;
 
-INSTANTIATE_TEST_CASE_P(smoke_LrnCheck, LrnLayerTest,
+namespace LRN2D {
+
+const std::vector<std::vector<int64_t>> axes = {{1}};
+
+INSTANTIATE_TEST_CASE_P(smoke_LrnCheck_2D, LrnLayerTest,
                         ::testing::Combine(::testing::Values(alpha),
                                            ::testing::Values(beta),
                                            ::testing::Values(bias),
@@ -32,8 +31,65 @@ INSTANTIATE_TEST_CASE_P(smoke_LrnCheck, LrnLayerTest,
                                            ::testing::ValuesIn(netPrecisions),
                                            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                                            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                           ::testing::Values(std::vector<size_t>({10, 10, 3, 2})),
+                                           ::testing::Values(std::vector<size_t>({10, 16})),
                                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         LrnLayerTest::getTestCaseName);
 
-}  // namespace
+} // namespace LRN2D
+
+namespace LRN3D {
+
+const std::vector<std::vector<int64_t>> axes = {{1}, {2}};
+
+INSTANTIATE_TEST_CASE_P(smoke_LrnCheck_3D, LrnLayerTest,
+                        ::testing::Combine(::testing::Values(alpha),
+                                           ::testing::Values(beta),
+                                           ::testing::Values(bias),
+                                           ::testing::Values(size),
+                                           ::testing::ValuesIn(axes),
+                                           ::testing::ValuesIn(netPrecisions),
+                                           ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                           ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                           ::testing::Values(std::vector<size_t>({6, 10, 16})),
+                                           ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        LrnLayerTest::getTestCaseName);
+
+} // namespace LRN3D
+
+namespace LRN4D {
+
+const std::vector<std::vector<int64_t>> axes = {{1}, {2, 3}, {3, 2}};
+
+INSTANTIATE_TEST_CASE_P(smoke_LrnCheck_4D, LrnLayerTest,
+                        ::testing::Combine(::testing::Values(alpha),
+                                           ::testing::Values(beta),
+                                           ::testing::Values(bias),
+                                           ::testing::Values(size),
+                                           ::testing::ValuesIn(axes),
+                                           ::testing::ValuesIn(netPrecisions),
+                                           ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                           ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                           ::testing::Values(std::vector<size_t>({10, 10, 3, 8})),
+                                           ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        LrnLayerTest::getTestCaseName);
+
+} // namespace LRN4D
+
+namespace LRN5D {
+
+const std::vector<std::vector<int64_t>> axes = {{1}, {2, 3, 4}, {4, 2, 3}};
+
+INSTANTIATE_TEST_CASE_P(smoke_LrnCheck_5D, LrnLayerTest,
+                        ::testing::Combine(::testing::Values(alpha),
+                                           ::testing::Values(beta),
+                                           ::testing::Values(bias),
+                                           ::testing::Values(size),
+                                           ::testing::ValuesIn(axes),
+                                           ::testing::ValuesIn(netPrecisions),
+                                           ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                           ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                           ::testing::Values(std::vector<size_t>({1, 10, 10, 7, 4})),
+                                           ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        LrnLayerTest::getTestCaseName);
+
+} // namespace LRN5D
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/tile.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/tile.cpp
index cac26120ba8..5894b1c1ec6 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/tile.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/tile.cpp
@@ -14,16 +14,17 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
         InferenceEngine::Precision::FP32
 };
 
-const std::vector<std::vector<int64_t>> repeats = {
+const std::vector<std::vector<int64_t>> repeats3D = {
         {1, 2, 3},
         {2, 1, 1},
         {2, 3, 1},
         {2, 2, 2},
+        {1, 1, 1}
 };
 
 INSTANTIATE_TEST_CASE_P(smoke_Tile, TileLayerTest,
         ::testing::Combine(
-                ::testing::ValuesIn(repeats),
+                ::testing::ValuesIn(repeats3D),
                 ::testing::ValuesIn(netPrecisions),
                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@@ -33,9 +34,14 @@ INSTANTIATE_TEST_CASE_P(smoke_Tile, TileLayerTest,
                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
         TileLayerTest::getTestCaseName);
 
+const std::vector<std::vector<int64_t>> repeats6D = {
+        {1, 1, 1, 2, 1, 2},
+        {1, 1, 1, 1, 1, 1}
+};
+
 INSTANTIATE_TEST_CASE_P(smoke_Tile6d, TileLayerTest,
         ::testing::Combine(
-                ::testing::Values(std::vector<int64_t>({1, 1, 1, 2, 1, 2})),
+                ::testing::ValuesIn(repeats6D),
                 ::testing::ValuesIn(netPrecisions),
                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                 ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index ef6daefea09..24493069064 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -19,8 +19,6 @@ std::vector<std::string> disabledTestPatterns() {
         // TODO: Issue 33886
         R"(.*(QuantGroupConv2D).*)",
         R"(.*(QuantGroupConv3D).*)",
-        // TODO: failed to downgrade to opset v0 in interpreter backend
-        R"(.*Gather.*axis=-1.*)",
         // TODO: Issue: 34518
         R"(.*RangeLayerTest.*)",
         R"(.*(RangeAddSubgraphTest).*Start=1.2.*Stop=(5.2|-5.2).*Step=(0.1|-0.1).*netPRC=FP16.*)",
@@ -50,20 +48,30 @@ std::vector<std::string> disabledTestPatterns() {
         // TODO: Issue 43417 sporadic issue, looks like an issue in test, reproducible only on Windows platform
         R"(.*decomposition1_batch=5_hidden_size=10_input_size=30_.*tanh.relu.*_clip=0_linear_before_reset=1.*_targetDevice=CPU_.*)",
         // Skip platforms that do not support BF16 (i.e. sse, avx, avx2)
-        R"(.*BF16.*(jit_avx(?!5)|jit_sse).*)",
+        R"(.*BF16.*(jit_avx(?!5)|jit_sse|ref).*)",
         // TODO: Incorrect blob sizes for node BinaryConvolution_X
         R"(.*BinaryConvolutionLayerTest.*)",
-        // TODO: 51676. Incorrect conversion of min and max limits from double to integral
         R"(.*ClampLayerTest.*netPrc=(I64|I32).*)",
         R"(.*ClampLayerTest.*netPrc=U64.*)",
-        // TODO: 42538. Unexpected application crush
-        R"(.*CoreThreadingTestsWithIterations\.smoke_LoadNetwork.t.*)"
+        R"(.*CoreThreadingTestsWithIterations\.smoke_LoadNetwork.t.*)",
+
+        // incorrect reference implementation
+        R"(.*NormalizeL2LayerTest.*axes=\(\).*)",
+        // lpt transformation produce the same names for MatMul and Multiply
+        R"(.*MatMulTransformation.*)",
+        // incorrect jit_uni_planar_convolution with dilation = {1, 2, 1} and output channel 1
+        R"(.*smoke_Convolution3D.*D=\(1.2.1\)_O=1.*)",
+
+        // Unsupported operation of type: NormalizeL2 name : Doesn't support reduction axes: (2.2)
+        R"(.*BF16NetworkRestore1.*)",
+        R"(.*MobileNet_ssd_with_branching.*)",
     };
 
     if (!InferenceEngine::with_cpu_x86_avx512_core()) {
         // on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
         // tests are useless on such platforms
        retVector.emplace_back(R"(.*BF16.*)");
+       retVector.emplace_back(R"(.*bfloat16.*)");
     }
 
     return retVector;
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/split_concat_memory.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/split_concat_memory.cpp
index a115a9cbf08..a5c93c9ac9c 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/split_concat_memory.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/split_concat_memory.cpp
@@ -34,7 +34,3 @@ INSTANTIATE_TEST_CASE_P(smoke_CPU, SplitConcatMemory,
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         SplitConcatMemory::getTestCaseName);
 }  // namespace
-
-
-
-
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/activation.cpp
index 3b8d372ef82..d473bbde6e2 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/activation.cpp
@@ -34,7 +34,21 @@ public:
         return result.str();
     }
     InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
-        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 15, 0, 32768);
+        int32_t data_start_from;
+        uint32_t data_range;
+        int32_t resolution;
+
+        if (activationType == ActivationTypes::Exp && netPrecision == Precision::BF16) {
+            data_start_from = 0;
+            data_range = 2;
+            resolution = 32768;
+        } else {
+            data_start_from = 0;
+            data_range = 15;
+            resolution = 32768;
+        }
+
+        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), data_range, data_start_from, resolution);
     }
 
 protected:
@@ -45,11 +59,10 @@ protected:
 
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
 
-        InferenceEngine::Precision netPrecision;
         std::pair<std::vector<size_t>, std::vector<size_t>> shapes;
         std::pair<ActivationTypes, std::vector<float>> activationDecl;
         std::tie(activationDecl, netPrecision, inPrc, outPrc, inLayout, outLayout, shapes, targetDevice) = basicParamsSet;
-        selectedType = getPrimitiveType() + "_" + inPrc.name();
+        selectedType = getPrimitiveType() + "_" + netPrecision.name();
 
         activationType = activationDecl.first;
         auto constantsValue = activationDecl.second;
@@ -59,6 +72,8 @@ protected:
         activation->get_rt_info() = getCPUInfo();
         function = std::make_shared<ngraph::Function>(ngraph::NodeVector{activation}, params, "Activation");
     }
+
+    InferenceEngine::Precision netPrecision;
 };
 
 TEST_P(ActivationLayerCPUTest, CompareWithRefs) {
@@ -98,14 +113,14 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic4D = {
         {{2, 17, 5, 4}, {{}}},
 };
 
-std::vector<Precision> bf16InpOutPrc = {Precision::BF16, Precision::FP32};
+std::vector<Precision> netPrc = {Precision::BF16, Precision::FP32};
 
 const auto basicCases4D = ::testing::Combine(
         ::testing::Combine(
             ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes)),
-            ::testing::Values(Precision::BF16),
-            ::testing::ValuesIn(bf16InpOutPrc),
-            ::testing::ValuesIn(bf16InpOutPrc),
+            ::testing::ValuesIn(netPrc),
+            ::testing::Values(Precision::FP32),
+            ::testing::Values(Precision::FP32),
             ::testing::Values(InferenceEngine::Layout::ANY),
             ::testing::Values(InferenceEngine::Layout::ANY),
             ::testing::ValuesIn(CommonTestUtils::combineParams(basic4D)),
@@ -129,9 +144,9 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic5D = {
 const auto basicCases5D = ::testing::Combine(
         ::testing::Combine(
                 ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes)),
-                ::testing::Values(Precision::BF16),
-                ::testing::ValuesIn(bf16InpOutPrc),
-                ::testing::ValuesIn(bf16InpOutPrc),
+                ::testing::ValuesIn(netPrc),
+                ::testing::Values(Precision::FP32),
+                ::testing::Values(Precision::FP32),
                 ::testing::Values(InferenceEngine::Layout::ANY),
                 ::testing::Values(InferenceEngine::Layout::ANY),
                 ::testing::ValuesIn(CommonTestUtils::combineParams(basic5D)),
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/batch_to_space.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/batch_to_space.cpp
index 425090ad5bf..603c96ead9e 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/batch_to_space.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/batch_to_space.cpp
@@ -43,9 +43,9 @@ protected:
         inPrc = outPrc = netPrecision;
 
         if (strcmp(netPrecision.name(), "U8") == 0)
-            selectedType = std::string("unknown_") + "I8";
+            selectedType = std::string("ref_any_") + "I8";
         else
-            selectedType = std::string("unknown_") + netPrecision.name();
+            selectedType = std::string("ref_any_") + netPrecision.name();
 
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
         auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convert_to_plugin_specific_node.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convert_to_plugin_specific_node.cpp
new file mode 100644
index 00000000000..a59b7f6fc3b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convert_to_plugin_specific_node.cpp
@@ -0,0 +1,120 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils/cpu_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace CPULayerTestsDefinitions {
+
+using ConvertToPluginSpecificNodeParams = std::tuple<SizeVector,            // non const input shape
+                                                     SizeVector,            // const input shape
+                                                     Precision,             // precision
+                                                     helpers::EltwiseTypes, // node type
+                                                     size_t,                // port for const input
+                                                     size_t>;               // expected number of constant node
+
+class ConvertToPluginSpecificNode : public testing::WithParamInterface<ConvertToPluginSpecificNodeParams>,
+                        public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConvertToPluginSpecificNodeParams> obj) {
+        SizeVector nonConstShape, constShape;
+        Precision prc;
+        helpers::EltwiseTypes nodeType;
+        size_t port, constNodeNum;
+        std::tie(nonConstShape, constShape, prc, nodeType, port, constNodeNum) = obj.param;
+
+        std::ostringstream result;
+        result << "IS_NON_CONST=" << CommonTestUtils::vec2str(nonConstShape) << "_";
+        result << "IS_CONST=" << CommonTestUtils::vec2str(constShape) << "_";
+        result << "PRC=" << prc << "_";
+        result << "NODE=" << nodeType << "_";
+        result << "PORT=" << port << "_";
+        result << "CONST_NUM=" << constNodeNum;
+
+        return result.str();
+    }
+
+protected:
+    size_t constNodeNum;
+
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        SizeVector nonConstShape, constShape;
+        Precision prc;
+        helpers::EltwiseTypes nodeType;
+        size_t port;
+
+        std::tie(nonConstShape, constShape, prc, nodeType, port, constNodeNum) = this->GetParam();
+        IE_ASSERT(shape_size(constShape) == 1);
+
+        const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(prc);
+        const auto param = std::make_shared<ngraph::opset1::Parameter>(ngPrc, ngraph::Shape(nonConstShape));
+        const auto constNode = builder::makeConstant(ngPrc, ngraph::Shape(constShape), std::vector<float>{}, true);
+        OutputVector inputs(2);
+        inputs[port] = constNode;
+        inputs[1 - port] = param;
+
+        auto powerStatic = ngraph::builder::makeEltwise(inputs[0], inputs[1], nodeType);
+
+        function = std::make_shared<ngraph::Function>(powerStatic, ParameterVector{param}, "ConvertToPluginSpecificNode");
+    }
+};
+
+TEST_P(ConvertToPluginSpecificNode, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckNodeOfTypeCount(executableNetwork, "Const", constNodeNum);
+}
+
+namespace {
+
+const std::vector<std::vector<size_t>> nonConstIS = {
+    {3, 4, 5, 6}
+};
+
+const std::vector<std::vector<size_t>> constIS = {
+    {},
+    {1},
+    {1, 1},
+    {1, 1, 1},
+    {1, 1, 1, 1},
+};
+
+std::vector<ngraph::helpers::EltwiseTypes> nodeTypes = {
+        ngraph::helpers::EltwiseTypes::ADD,
+        ngraph::helpers::EltwiseTypes::SUBTRACT,
+        ngraph::helpers::EltwiseTypes::MULTIPLY
+};
+
+const std::vector<size_t> port = {
+    0, 1
+};
+
+const auto testParamsEltwise = ::testing::Combine(::testing::ValuesIn(nonConstIS),
+                                                  ::testing::ValuesIn(constIS),
+                                                  ::testing::Values(Precision::FP32),
+                                                  ::testing::ValuesIn(nodeTypes),
+                                                  ::testing::ValuesIn(port),
+                                                  ::testing::Values(0));
+
+INSTANTIATE_TEST_CASE_P(smoke_CheckEltwise, ConvertToPluginSpecificNode, testParamsEltwise, ConvertToPluginSpecificNode::getTestCaseName);
+
+const auto testParamsPower = ::testing::Combine(::testing::ValuesIn(nonConstIS),
+                                                ::testing::ValuesIn(constIS),
+                                                ::testing::Values(Precision::FP32),
+                                                ::testing::Values(ngraph::helpers::EltwiseTypes::POWER),
+                                                ::testing::Values(1),
+                                                ::testing::Values(0));
+
+INSTANTIATE_TEST_CASE_P(smoke_CheckPower, ConvertToPluginSpecificNode, testParamsPower, ConvertToPluginSpecificNode::getTestCaseName);
+
+} // namespace
+
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp
index 597b6d053b7..2924da5a93a 100755
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp
@@ -100,6 +100,14 @@ TEST_P(ConvolutionLayerCPUTest, CompareWithRefs) {
 
 namespace {
 
+const auto fusingPRelu1D = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+    {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+        auto shape = inpNode->get_shape();
+        ngraph::Shape newShape({shape[1]});
+        auto data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(newShape));
+        return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, newShape, data);
+    }, "PRelu1D"}}), {"PRelu"}};
+
 /* COMMON PARAMS */
 const std::vector<fusingSpecificParams> fusingParamsSet{
         emptyFusingSpec,
@@ -108,17 +116,19 @@ const std::vector<fusingSpecificParams> fusingParamsSet{
         fusingElu,
         fusingSigmoid,
         fusingClamp,
-        fusingPRelu,
+        fusingPReluPerChannel,
         fusingSwish,
         fusingHSwish,
         fusingMish,
         fusingSoftPlus,
         // other patterns
+        fusingReluAdd,
         fusingReluScaleShift,
         fusingFakeQuantizePerTensorRelu,
         fusingFakeQuantizePerChannelRelu,
         fusingSumEluFQ,
-        fusingSum
+        fusingSum,
+        fusingPRelu1D
 };
 
 const std::vector<fusingSpecificParams> fusingParamsSetBF16{
@@ -128,9 +138,10 @@ const std::vector<fusingSpecificParams> fusingParamsSetBF16{
         fusingElu,
         fusingSigmoid,
         fusingClamp,
-        fusingPRelu,
+        fusingPReluPerChannel,
         fusingSwish,
         // other patterns
+        fusingReluAdd,
         fusingReluScaleShift,
         fusingSum
 };
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp
index 2079dd7945a..d5ab59afc7c 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp
@@ -47,7 +47,7 @@ protected:
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
 
-        selectedType = getPrimitiveType() + "_" + inPrc.name();
+        selectedType = getPrimitiveType() + "_" + netPrecision.name();
 
         std::vector<size_t> inputShape1, inputShape2;
         if (inputShapes.size() == 1) {
@@ -124,9 +124,8 @@ std::vector<CommonTestUtils::OpType> opTypes = {
 std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypesBinInp = {
         ngraph::helpers::EltwiseTypes::ADD,
         ngraph::helpers::EltwiseTypes::MULTIPLY,
-        // TODO: Disabled because memory formats filter is not propogated through ngraph transformations
-//        ngraph::helpers::EltwiseTypes::SUBTRACT,
-//        ngraph::helpers::EltwiseTypes::DIVIDE,
+        ngraph::helpers::EltwiseTypes::SUBTRACT,
+        ngraph::helpers::EltwiseTypes::DIVIDE,
         ngraph::helpers::EltwiseTypes::FLOOR_MOD,
         ngraph::helpers::EltwiseTypes::SQUARED_DIFF,
 };
@@ -138,7 +137,7 @@ std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypesDiffInp = { // Differen
 
 std::map<std::string, std::string> additional_config;
 
-std::vector<Precision> bf16InpOutPrc = {Precision::BF16, Precision::FP32};
+std::vector<Precision> netPrc = {Precision::BF16, Precision::FP32};
 
 
 std::vector<std::vector<std::vector<size_t>>> inShapes_4D = {
@@ -154,13 +153,13 @@ std::vector<CPUSpecificParams> cpuParams_4D = {
         CPUSpecificParams({nchw, nchw}, {nchw}, {}, {})
 };
 
-const auto params_4D_FP32 = ::testing::Combine(
+const auto params_4D = ::testing::Combine(
         ::testing::Combine(
             ::testing::ValuesIn(inShapes_4D),
             ::testing::ValuesIn(eltwiseOpTypesBinInp),
             ::testing::ValuesIn(secondaryInputTypes),
             ::testing::ValuesIn(opTypes),
-            ::testing::Values(InferenceEngine::Precision::FP32),
+            ::testing::ValuesIn(netPrc),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Layout::ANY),
@@ -168,39 +167,23 @@ const auto params_4D_FP32 = ::testing::Combine(
             ::testing::Values(additional_config)),
         ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)));
 
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_FP32_MemOrder, EltwiseLayerCPUTest, params_4D_FP32, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_MemOrder, EltwiseLayerCPUTest, params_4D, EltwiseLayerCPUTest::getTestCaseName);
 
-const auto params_4D_BF16 = ::testing::Combine(
-        ::testing::Combine(
-                ::testing::ValuesIn(inShapes_4D),
-                ::testing::ValuesIn(eltwiseOpTypesBinInp),
-                ::testing::ValuesIn(secondaryInputTypes),
-                ::testing::ValuesIn(opTypes),
-                ::testing::Values(InferenceEngine::Precision::BF16),
-                ::testing::ValuesIn(bf16InpOutPrc),
-                ::testing::ValuesIn(bf16InpOutPrc),
-                ::testing::Values(InferenceEngine::Layout::ANY),
-                ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                ::testing::Values(additional_config)),
-        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)));
-
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_BF16_MemOrder, EltwiseLayerCPUTest, params_4D_BF16, EltwiseLayerCPUTest::getTestCaseName);
-
-const auto params_4D_BF16_emptyCPUSpec = ::testing::Combine(
+const auto params_4D_emptyCPUSpec = ::testing::Combine(
         ::testing::Combine(
                 ::testing::ValuesIn(inShapes_4D),
                 ::testing::ValuesIn(eltwiseOpTypesDiffInp),
                 ::testing::ValuesIn(secondaryInputTypes),
                 ::testing::ValuesIn(opTypes),
-                ::testing::Values(InferenceEngine::Precision::BF16),
-                ::testing::ValuesIn(bf16InpOutPrc),
-                ::testing::ValuesIn(bf16InpOutPrc),
+                ::testing::ValuesIn(netPrc),
+                ::testing::Values(InferenceEngine::Precision::FP32),
+                ::testing::Values(InferenceEngine::Precision::FP32),
                 ::testing::Values(InferenceEngine::Layout::ANY),
                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                 ::testing::Values(additional_config)),
         ::testing::Values(emptyCPUSpec));
 
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_BF16, EltwiseLayerCPUTest, params_4D_BF16_emptyCPUSpec, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_emptyCPUSpec, EltwiseLayerCPUTest, params_4D_emptyCPUSpec, EltwiseLayerCPUTest::getTestCaseName);
 
 std::vector<std::vector<std::vector<size_t>>> inShapes_5D = {
         {{2, 4, 3, 4, 1}},
@@ -215,13 +198,13 @@ std::vector<CPUSpecificParams> cpuParams_5D = {
         CPUSpecificParams({ncdhw, ncdhw}, {ncdhw}, {}, {})
 };
 
-const auto params_5D_FP32 = ::testing::Combine(
+const auto params_5D = ::testing::Combine(
         ::testing::Combine(
             ::testing::ValuesIn(inShapes_5D),
             ::testing::ValuesIn(eltwiseOpTypesBinInp),
             ::testing::ValuesIn(secondaryInputTypes),
             ::testing::ValuesIn(opTypes),
-            ::testing::Values(InferenceEngine::Precision::FP32),
+            ::testing::ValuesIn(netPrc),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Layout::ANY),
@@ -229,39 +212,23 @@ const auto params_5D_FP32 = ::testing::Combine(
             ::testing::Values(additional_config)),
         ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)));
 
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_FP32_MemOrder, EltwiseLayerCPUTest, params_5D_FP32, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_MemOrder, EltwiseLayerCPUTest, params_5D, EltwiseLayerCPUTest::getTestCaseName);
 
-const auto params_5D_BF16 = ::testing::Combine(
-        ::testing::Combine(
-                ::testing::ValuesIn(inShapes_5D),
-                ::testing::ValuesIn(eltwiseOpTypesBinInp),
-                ::testing::ValuesIn(secondaryInputTypes),
-                ::testing::ValuesIn(opTypes),
-                ::testing::Values(InferenceEngine::Precision::BF16),
-                ::testing::ValuesIn(bf16InpOutPrc),
-                ::testing::ValuesIn(bf16InpOutPrc),
-                ::testing::Values(InferenceEngine::Layout::ANY),
-                ::testing::Values(CommonTestUtils::DEVICE_CPU),
-                ::testing::Values(additional_config)),
-        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)));
-
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_BF16_MemOrder, EltwiseLayerCPUTest, params_5D_BF16, EltwiseLayerCPUTest::getTestCaseName);
-
-const auto params_5D_BF16_emptyCPUSpec = ::testing::Combine(
+const auto params_5D_emptyCPUSpec = ::testing::Combine(
         ::testing::Combine(
                 ::testing::ValuesIn(inShapes_5D),
                 ::testing::ValuesIn(eltwiseOpTypesDiffInp),
                 ::testing::ValuesIn(secondaryInputTypes),
                 ::testing::ValuesIn(opTypes),
-                ::testing::Values(InferenceEngine::Precision::BF16),
-                ::testing::ValuesIn(bf16InpOutPrc),
-                ::testing::ValuesIn(bf16InpOutPrc),
+                ::testing::ValuesIn(netPrc),
+                ::testing::Values(InferenceEngine::Precision::FP32),
+                ::testing::Values(InferenceEngine::Precision::FP32),
                 ::testing::Values(InferenceEngine::Layout::ANY),
                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                 ::testing::Values(additional_config)),
         ::testing::Values(emptyCPUSpec));
 
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_BF16, EltwiseLayerCPUTest, params_5D_BF16_emptyCPUSpec, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D, EltwiseLayerCPUTest, params_5D_emptyCPUSpec, EltwiseLayerCPUTest::getTestCaseName);
 
 std::vector<std::vector<std::vector<size_t>>> inShapes_4D_Blocked_Planar = {
         {{2, 17, 31, 3}, {2, 1, 31, 3}},
@@ -272,13 +239,13 @@ std::vector<CPUSpecificParams> cpuParams_4D_Blocked_Planar = {
         CPUSpecificParams({nChw16c, nchw}, {nChw16c}, {}, {}),
 };
 
-const auto params_4D_FP32_Blocked_Planar = ::testing::Combine(
+const auto params_4D_Blocked_Planar = ::testing::Combine(
         ::testing::Combine(
             ::testing::ValuesIn(inShapes_4D_Blocked_Planar),
             ::testing::ValuesIn(eltwiseOpTypesBinInp),
             ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
             ::testing::ValuesIn(opTypes),
-            ::testing::Values(InferenceEngine::Precision::FP32),
+            ::testing::ValuesIn(netPrc),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Layout::ANY),
@@ -286,7 +253,7 @@ const auto params_4D_FP32_Blocked_Planar = ::testing::Combine(
             ::testing::Values(additional_config)),
         ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Blocked_Planar)));
 
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_FP32_Blocked_Planar, EltwiseLayerCPUTest, params_4D_FP32_Blocked_Planar, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_Blocked_Planar, EltwiseLayerCPUTest, params_4D_Blocked_Planar, EltwiseLayerCPUTest::getTestCaseName);
 
 
 std::vector<std::vector<std::vector<size_t>>> inShapes_4D_Planar_Blocked = {
@@ -298,13 +265,13 @@ std::vector<CPUSpecificParams> cpuParams_4D_Planar_Blocked = {
         CPUSpecificParams({nchw, nChw16c}, {nChw16c}, {}, {}),
 };
 
-const auto params_4D_FP32_Planar_Blocked = ::testing::Combine(
+const auto params_4D_Planar_Blocked = ::testing::Combine(
         ::testing::Combine(
             ::testing::ValuesIn(inShapes_4D_Planar_Blocked),
             ::testing::ValuesIn(eltwiseOpTypesBinInp),
             ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
             ::testing::ValuesIn(opTypes),
-            ::testing::Values(InferenceEngine::Precision::FP32),
+            ::testing::ValuesIn(netPrc),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Layout::ANY),
@@ -312,7 +279,7 @@ const auto params_4D_FP32_Planar_Blocked = ::testing::Combine(
             ::testing::Values(additional_config)),
         ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Planar_Blocked)));
 
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_FP32_Planar_Blocked, EltwiseLayerCPUTest, params_4D_FP32_Planar_Blocked, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_Planar_Blocked, EltwiseLayerCPUTest, params_4D_Planar_Blocked, EltwiseLayerCPUTest::getTestCaseName);
 
 
 std::vector<std::vector<std::vector<size_t>>> inShapes_5D_Blocked_Planar = {
@@ -324,13 +291,13 @@ std::vector<CPUSpecificParams> cpuParams_5D_Blocked_Planar = {
         CPUSpecificParams({nCdhw16c, ncdhw}, {nCdhw16c}, {}, {}),
 };
 
-const auto params_5D_FP32_Blocked_Planar = ::testing::Combine(
+const auto params_5D_Blocked_Planar = ::testing::Combine(
         ::testing::Combine(
             ::testing::ValuesIn(inShapes_5D_Blocked_Planar),
             ::testing::ValuesIn(eltwiseOpTypesBinInp),
             ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
             ::testing::ValuesIn(opTypes),
-            ::testing::Values(InferenceEngine::Precision::FP32),
+            ::testing::ValuesIn(netPrc),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Precision::FP32),
             ::testing::Values(InferenceEngine::Layout::ANY),
@@ -338,7 +305,7 @@ const auto params_5D_FP32_Blocked_Planar = ::testing::Combine(
             ::testing::Values(additional_config)),
         ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_Blocked_Planar)));
 
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_FP32_Blocked_Planar, EltwiseLayerCPUTest, params_5D_FP32_Blocked_Planar, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_Blocked_Planar, EltwiseLayerCPUTest, params_5D_Blocked_Planar, EltwiseLayerCPUTest::getTestCaseName);
 
 
 std::vector<std::vector<std::vector<size_t>>> inShapes_5D_Planar_Blocked = {
@@ -350,13 +317,13 @@ std::vector<CPUSpecificParams> cpuParams_5D_Planar_Blocked = {
         CPUSpecificParams({ncdhw, nCdhw16c}, {nCdhw16c}, {}, {}),
 };
 
-const auto params_5D_FP32_Planar_Blocked = ::testing::Combine(
+const auto params_5D_Planar_Blocked = ::testing::Combine(
         ::testing::Combine(
                 ::testing::ValuesIn(inShapes_5D_Planar_Blocked),
                 ::testing::ValuesIn(eltwiseOpTypesBinInp),
                 ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
                 ::testing::ValuesIn(opTypes),
-                ::testing::Values(InferenceEngine::Precision::FP32),
+                ::testing::ValuesIn(netPrc),
                 ::testing::Values(InferenceEngine::Precision::FP32),
                 ::testing::Values(InferenceEngine::Precision::FP32),
                 ::testing::Values(InferenceEngine::Layout::ANY),
@@ -364,7 +331,7 @@ const auto params_5D_FP32_Planar_Blocked = ::testing::Combine(
                 ::testing::Values(additional_config)),
         ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_Planar_Blocked)));
 
-INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_FP32_Planar_Blocked, EltwiseLayerCPUTest, params_5D_FP32_Planar_Blocked, EltwiseLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_Planar_Blocked, EltwiseLayerCPUTest, params_5D_Planar_Blocked, EltwiseLayerCPUTest::getTestCaseName);
 
 } // namespace
 } // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp
index ac1c4467a22..a5832b40835 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp
@@ -114,7 +114,7 @@ protected:
         auto oh = builder::makeConstant(ngInPrec, inRangesShapes[3], rangesBounds[3], rangesBounds[3].empty());
         auto fq = std::make_shared<opset5::FakeQuantize>(paramOuts[0], il, ih, ol, oh, levels);
 
-        layerName = shouldBeDecomposed ? "" : "Quantize";
+        layerName = shouldBeDecomposed ? "" : "FakeQuantize";
 
         if (selectedType.empty()) {
            selectedType = getPrimitiveType() + "_" + inPrec.name();
@@ -285,4 +285,4 @@ INSTANTIATE_TEST_CASE_P(smoke_FakeQuantizeLayerCPUTest_Decompos, FakeQuantizeLay
 
 } // namespace fqDecompos
 
-} // namespace CPULayerTestsDefinitions
\ No newline at end of file
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gather_elements.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gather_elements.cpp
index 897783cd85d..7ea6cb505ba 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gather_elements.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gather_elements.cpp
@@ -51,7 +51,7 @@ protected:
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
 
         std::tie(dataShape, indicesShape, axis, dPrecision, iPrecision, targetDevice) = basicParamsSet;
-        selectedType = std::string("unknown_") + dPrecision.name();
+        selectedType = std::string("ref_any_") + dPrecision.name();
 
         auto ngDPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(dPrecision);
         auto ngIPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(iPrecision);
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp
index b3267e7e199..17a7d79036c 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp
@@ -111,6 +111,14 @@ std::vector<groupConvLayerCPUTestParamsSet> filterParamsSetForDevice(std::vector
 }
 /* ===================== */
 
+const auto fusingPRelu1D = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+    {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+        auto shape = inpNode->get_shape();
+        ngraph::Shape newShape({shape[1]});
+        auto data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(newShape));
+        return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, newShape, data);
+    }, "PRelu1D"}}), {"PRelu"}};
+
 /* COMMON PARAMS */
 std::vector<fusingSpecificParams> fusingParamsSet {
         emptyFusingSpec,
@@ -119,7 +127,7 @@ std::vector<fusingSpecificParams> fusingParamsSet {
         fusingElu,
         fusingSigmoid,
         fusingClamp,
-        fusingPRelu,
+        fusingPReluPerChannel,
         fusingSwish,
         fusingHSwish,
         fusingMish,
@@ -130,6 +138,7 @@ std::vector<fusingSpecificParams> fusingParamsSet {
         fusingFakeQuantizePerChannelRelu,
         fusingSumEluFQ,
         fusingSum,
+        fusingPRelu1D
 };
 
 
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp
index 5efa57cb808..b3f7b213136 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp
@@ -72,6 +72,12 @@ protected:
              {num_directions, (linear_before_reset ? 4 : 3) * hidden_size}},
         };
 
+        // method MKLDNNMemoryDesc::isSame can't correct compute layout for tensor with strides = 1
+        // returned output format always tnc
+        if (inFmts.size() == 2 && ngraph::shape_size(inputShapes[1]) == 1) {
+            inFmts[1] = tnc;
+        }
+
         configuration.insert(additionalConfig.begin(), additionalConfig.end());
 
         if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
@@ -104,6 +110,19 @@ protected:
                                                      true,
                                                      direction,
                                                      m_mode);
+
+        // method MKLDNNMemoryDesc::isSame can't correct compute layout for tensor with strides = 1
+        // returned output format always tnc
+        if (ngraph::shape_size(gru_sequence->get_output_shape(0)) == 1) {
+            outFmts[0] = tnc;
+        } else if (ngraph::shape_size(gru_sequence->get_output_shape(1)) == 1) {
+            outFmts[1] = tnc;
+        }
+        // if output format equals for all outputs, runtime info return only one formats
+        if (outFmts[0] == outFmts[1]) {
+            outFmts.erase(outFmts.begin());
+        }
+
         ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gru_sequence->output(0)),
                                      std::make_shared<ngraph::opset1::Result>(gru_sequence->output(1))};
 
@@ -151,8 +170,8 @@ namespace {
 std::vector<std::map<std::string, std::string>> additionalConfig
     = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
 
-CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"};
-CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"};;
+CPUSpecificParams cpuParams{{ntc, ntc}, {tnc, ntc}, {"ref_any"}, "ref_any"};
+CPUSpecificParams cpuParamsBatchSizeOne{{ntc, ntc}, {tnc, ntc}, {"ref_any"}, "ref_any"};;
 
 std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
 // output values increase rapidly without clip, so use only seq_lenghts = 2
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp
index ec57da59409..174c4725557 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp
@@ -101,7 +101,10 @@ protected:
             selectedType = getPrimitiveType();
         }
         selectedType.push_back('_');
-        selectedType += netPrecision.name();
+        if (additionalConfig.count(PluginConfigParams::KEY_ENFORCE_BF16) && additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES)
+            selectedType += "BF16";
+        else
+            selectedType += netPrecision.name();
     }
 };
 
@@ -327,7 +330,6 @@ std::vector<CPUSpecificParams> filterCPUInfoForDevice5D() {
     if (with_cpu_x86_avx512f()) {
         resCPUParams.push_back(CPUSpecificParams{{nCdhw16c, x, x, x}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512"});
         resCPUParams.push_back(CPUSpecificParams{{ndhwc, x, x, x}, {ndhwc}, {"jit_avx512"}, "jit_avx512"});
-        resCPUParams.push_back(CPUSpecificParams{{ncdhw, x, x, x}, {ncdhw}, {"jit_avx2"}, "jit_avx2"});
     } else if (with_cpu_x86_avx2()) {
         resCPUParams.push_back(CPUSpecificParams{{nCdhw8c, x, x, x}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2"});
         resCPUParams.push_back(CPUSpecificParams{{ndhwc, x, x, x}, {ndhwc}, {"jit_avx2"}, "jit_avx2"});
@@ -366,7 +368,7 @@ const auto interpolateCasesLinearOnnx5D = ::testing::Combine(
         ::testing::ValuesIn(defaultScales5D));
 
 const auto interpolateCasesNN5D = ::testing::Combine(
-        ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx),
+        ::testing::Values(ngraph::op::v4::Interpolate::InterpolateMode::nearest),
         ::testing::ValuesIn(shapeCalculationMode),
         ::testing::ValuesIn(coordinateTransformModes),
         ::testing::ValuesIn(defNearestModes),
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp
index 50e9717de51..538ca675a7f 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp
@@ -74,6 +74,16 @@ protected:
              {num_directions, 4 * hidden_size}},
         };
 
+        // method MKLDNNMemoryDesc::isSame can't correct compute layout for tensor with strides = 1
+        // returned output format always tnc
+        if (inFmts.size() >= 3) {
+            for (size_t i = 1; i < 3; i++) {
+                if (ngraph::shape_size(inputShapes[i]) == 1) {
+                    inFmts[i] = tnc;
+                }
+            }
+        }
+
         configuration.insert(additionalConfig.begin(), additionalConfig.end());
 
         if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
@@ -104,6 +114,21 @@ protected:
                                                        true,
                                                        direction,
                                                        m_mode);
+
+        // method MKLDNNMemoryDesc::isSame can't correct compute layout for tensor with strides = 1
+        // returned output format always tnc
+        if (outFmts.size() >= 3) {
+            for (size_t i = 1; i < 3; i++) {
+                if (ngraph::shape_size(lstm_sequence->get_output_shape(i)) == 1) {
+                    outFmts[i] = tnc;
+                }
+            }
+        }
+        // if output format equals for all outputs, runtime info return only one formats
+        if (std::adjacent_find(outFmts.begin(), outFmts.end(), std::not_equal_to<cpu_memory_format_t>()) == outFmts.end()) {
+            outFmts.resize(1);
+        }
+
         ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(0)),
                                      std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(1)),
                                      std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(2))};
@@ -154,8 +179,8 @@ std::vector<std::map<std::string, std::string>> additionalConfig
     = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
        {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
 
-CPUSpecificParams cpuParams{{ntc, nc, nc}, {ntc, nc, nc}, {"ref_any"}, "ref_any"};
-CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc, nc}, {tnc, nc, nc}, {"ref_any"}, "ref_any"};
+CPUSpecificParams cpuParams{{ntc, ntc, ntc}, {tnc, ntc, ntc}, {"ref_any"}, "ref_any"};
+CPUSpecificParams cpuParamsBatchSizeOne{{ntc, ntc, ntc}, {tnc, ntc, ntc}, {"ref_any"}, "ref_any"};
 
 std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
 std::vector<size_t> seq_lengths_zero_clip{2};
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp
new file mode 100644
index 00000000000..020d9ffc3bf
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp
@@ -0,0 +1,205 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <shared_test_classes/single_layer/normalize_l2.hpp>
+#include "test_utils/fusing_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+using namespace LayerTestsDefinitions;
+
+namespace CPULayerTestsDefinitions {
+
+enum class MatMulNodeType {
+    MatMul,
+    FullyConnected
+};
+
+using MatMulLayerTestParams = std::tuple<std::pair<SizeVector, SizeVector>,
+                                         Precision,
+                                         helpers::InputLayerType,
+                                         bool,
+                                         bool>;
+
+using MatMulLayerCPUTestParamSet = std::tuple<MatMulLayerTestParams,
+                                              MatMulNodeType,
+                                              fusingSpecificParams>;
+
+class MatMulLayerCPUTest : public testing::WithParamInterface<MatMulLayerCPUTestParamSet>,
+                                virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<MatMulLayerCPUTestParamSet> obj) {
+        MatMulLayerTestParams basicParamsSet;
+        fusingSpecificParams fusingParams;
+        MatMulNodeType nodeType;
+        std::tie(basicParamsSet, nodeType, fusingParams) = obj.param;
+
+        std::pair<SizeVector, SizeVector> IS;
+        SizeVector isA, isB;
+        bool transpA, transpB;
+        Precision prec;
+        helpers::InputLayerType typeB;
+        std::tie(IS, prec, typeB, transpA, transpB) = basicParamsSet;
+        isA = IS.first; isB = IS.second;
+
+        std::ostringstream result;
+        result << (nodeType == MatMulNodeType::MatMul ? "MatMul_" : "FullyConnected_");
+        result << "IS_A=" << CommonTestUtils::vec2str(isA) << "_";
+        result << "IS_B=" << CommonTestUtils::vec2str(isB) << "_";
+        result << "Transp_A=" << transpA << "_";
+        result << "Transp_B=" << transpB << "_";
+        result << "Prec=" << prec << "_";
+        result << "typeB=" << typeB;
+
+        result << CpuTestWithFusing::getTestCaseName(fusingParams);
+
+        return result.str();
+    }
+
+protected:
+     std::string cpuNodeType;
+
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        MatMulLayerTestParams basicParamsSet;
+        MatMulNodeType nodeType;
+        fusingSpecificParams fusingParams;
+        std::tie(basicParamsSet, nodeType, fusingParams) = this->GetParam();
+
+        cpuNodeType = nodeType == MatMulNodeType::MatMul ? "MatMul" : "FullyConnected";
+
+        std::pair<SizeVector, SizeVector> IS;
+        SizeVector isA, isB;
+        bool transpA, transpB;
+        Precision prec;
+        helpers::InputLayerType typeB;
+        std::tie(IS, prec, typeB, transpA, transpB) = basicParamsSet;
+
+        isA = IS.first; isB = IS.second;
+        if (transpA) {
+            IE_ASSERT(isA.size() > 1);
+            std::swap(*(isA.end() - 1), *(isA.end() - 2));
+        }
+        if (transpB) {
+            IE_ASSERT(isB.size() > 1);
+            std::swap(*(isB.end() - 1), *(isB.end() - 2));
+        }
+
+        auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(prec);
+        auto params = builder::makeParams(ngPrec, {isA});
+        auto matrixB = builder::makeInputLayer(ngPrec, typeB, isB);
+        if (typeB == helpers::InputLayerType::PARAMETER) {
+            params.push_back(std::dynamic_pointer_cast<opset1::Parameter>(matrixB));
+        }
+        auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<opset1::Parameter>(params));
+        auto matMul = builder::makeMatMul(paramOuts[0], matrixB, transpA, transpB);
+        function = makeNgraphFunction(ngPrec, params, matMul, cpuNodeType);
+        checkFusingPosition = false;
+    }
+};
+
+TEST_P(MatMulLayerCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckFusingResults(executableNetwork, cpuNodeType);
+}
+
+namespace {
+
+/* ============= Common params ============= */
+const std::vector<bool> transpose = {
+    true, false
+};
+
+/* ============= FullyConnected ============= */
+namespace fullyConnected {
+
+const auto fusingBiasFC = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<Node> inpNode, const element::Type& ngPrc, ParameterVector& params) {
+                auto bias = builder::makeConstant(ngPrc, Shape({inpNode->get_input_shape(1).back()}), std::vector<float>{}, true);
+                return std::make_shared<opset1::Add>(inpNode, bias);
+            }, "fusingBiasFC"}}), {"Add"}};
+
+const std::vector<std::pair<SizeVector, SizeVector>> IS2D = {
+    {{59, 1}, {1, 120}},
+    {{59, 120}, {120, 1}},
+    {{1, 120}, {120, 59}},
+    {{71, 128}, {128, 20}}
+};
+
+std::vector<fusingSpecificParams> fusingParamsSet2D {
+        emptyFusingSpec,
+        fusingBiasFC,
+        fusingRelu,
+        fusingMultiplyPerChannel,
+        fusingPReluPerTensor
+};
+
+const auto fullyConnectedParams2D = ::testing::Combine(::testing::ValuesIn(IS2D),
+                                                       ::testing::Values(Precision::FP32),
+                                                       ::testing::Values(helpers::InputLayerType::CONSTANT),
+                                                       ::testing::ValuesIn(transpose),
+                                                       ::testing::ValuesIn(transpose));
+
+const auto testParams2D = ::testing::Combine(fullyConnectedParams2D,
+                                             ::testing::Values(MatMulNodeType::FullyConnected),
+                                             ::testing::ValuesIn(fusingParamsSet2D));
+
+INSTANTIATE_TEST_CASE_P(smoke_Check_2D, MatMulLayerCPUTest, testParams2D, MatMulLayerCPUTest::getTestCaseName);
+
+const std::vector<std::pair<SizeVector, SizeVector>> IS3D = {
+    {{1, 32, 120}, {120, 5}},
+    {{7, 32, 120}, {120, 50}}
+};
+
+std::vector<fusingSpecificParams> fusingParamsSet3D {
+        emptyFusingSpec,
+        fusingBiasFC
+};
+
+const auto fullyConnectedParams3D = ::testing::Combine(::testing::ValuesIn(IS3D),
+                                                       ::testing::Values(Precision::FP32),
+                                                       ::testing::Values(helpers::InputLayerType::CONSTANT),
+                                                       ::testing::ValuesIn(transpose),
+                                                       ::testing::ValuesIn(transpose));
+
+const auto testParams3D = ::testing::Combine(fullyConnectedParams3D,
+                                             ::testing::Values(MatMulNodeType::FullyConnected),
+                                             ::testing::ValuesIn(fusingParamsSet3D));
+
+INSTANTIATE_TEST_CASE_P(smoke_Check_3D, MatMulLayerCPUTest, testParams3D, MatMulLayerCPUTest::getTestCaseName);
+
+}; // namespace fullyConnected
+
+
+/* ============= Gemm ============= */
+namespace gemm {
+
+const std::vector<std::pair<SizeVector, SizeVector>> IS = {
+    {{1, 2, 32, 120}, {120, 5}},
+    {{7, 32, 120}, {3, 7, 120, 50}},
+    {{10, 10, 10}, {10, 10, 10}},
+    {{55, 12}, {12, 55}}
+};
+
+const auto gemmParams = ::testing::Combine(::testing::ValuesIn(IS),
+                                           ::testing::Values(Precision::FP32),
+                                           ::testing::Values(helpers::InputLayerType::PARAMETER),
+                                           ::testing::ValuesIn(transpose),
+                                           ::testing::ValuesIn(transpose));
+
+const auto testParams = ::testing::Combine(gemmParams,
+                                           ::testing::Values(MatMulNodeType::MatMul),
+                                           ::testing::Values(emptyFusingSpec));
+
+INSTANTIATE_TEST_CASE_P(smoke_Check, MatMulLayerCPUTest, testParams, MatMulLayerCPUTest::getTestCaseName);
+
+}; // namespace gemm
+
+} // namespace
+
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp
index bda32ff8c96..74b265d1935 100755
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp
@@ -48,20 +48,18 @@ protected:
         float eps;
         op::EpsMode eps_mode;
         SizeVector inputShapes;
-        Precision netPrecision;
-        std::tie(axes, eps, eps_mode, inputShapes, netPrecision, targetDevice) = basicParamsSet;
+        std::tie(axes, eps, eps_mode, inputShapes, inPrc, targetDevice) = basicParamsSet;
 
-        inPrc = outPrc = netPrecision;
-        auto netPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+        outPrc = inPrc;
+        auto netPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
         auto params = builder::makeParams(netPrc, {inputShapes});
         auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(params));
         auto normalize = builder::makeNormalizeL2(paramOuts[0], axes, eps, eps_mode);
 
         function = makeNgraphFunction(netPrc, params, normalize, "Normalize");
 
-        selectedType = "unknown_" + std::string(netPrecision.name());
+        selectedType = "unknown_" + std::string(inPrc.name());
         threshold = 0.015f;
-        checkFusingPosition = false;
     }
 };
 
@@ -69,31 +67,27 @@ TEST_P(NormalizeL2LayerCPUTest, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     Run();
-    CheckPluginRelatedResults(executableNetwork, "Normalize");
+    CheckPluginRelatedResults(executableNetwork, "NormalizeL2");
 }
 
 namespace {
 
 /* ============= Common params ============= */
-const auto fusingMultiplySharedChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
-            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
-                SizeVector secondMultInShape(1, 1);
-                auto secondMultInput = builder::makeConstant(ngPrc, Shape(secondMultInShape), std::vector<float>{}, true);
-                return std::make_shared<op::v1::Multiply>(inpNode, secondMultInput);
-            }, "Multiply(SharedChannel)"}}), {"Multiply"}};
-
-const auto fusingMultiplyNoSharedChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
-            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
-                SizeVector secondMultInShape(inpNode->get_shape().size(), 1);
-                secondMultInShape[1] = inpNode->get_shape()[1];
-                auto secondMultInput = builder::makeConstant(ngPrc, Shape(secondMultInShape), std::vector<float>{}, true);
-                return std::make_shared<op::v1::Multiply>(inpNode, secondMultInput);
-            }, "Multiply(NoSharedChannel)"}}), {"Multiply"}};
-
 std::vector<fusingSpecificParams> fusingParamsSet {
         emptyFusingSpec,
-        fusingMultiplySharedChannel,
-        fusingMultiplyNoSharedChannel
+        fusingMultiplyPerTensor,
+        fusingMultiplyPerChannel,
+        fusingAddPerTensor,
+        fusingAddPerChannel,
+        fusingSubtractPerTensor,
+        fusingSubtractPerChannel,
+        fusingDividePerTensor,
+        fusingDividePerChannel,
+        fusingPReluPerChannel,
+        fusingPReluPerTensor,
+        fusingRelu,
+        fusingGelu,
+        fusingReluScaleShift
 };
 
 const float epsilon = 1e-4f;
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/one_hot.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/one_hot.cpp
index f80298200c9..4e966d6c0f9 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/one_hot.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/one_hot.cpp
@@ -61,11 +61,12 @@ protected:
 
         std::tie(inputShape, axis, depth, onValue, offValue, netPrecision, inPrc, outPrc, targetDevice, cpuParams) = this->GetParam();
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
-        selectedType = std::string("unknown_") + inPrc.name();
+        selectedType = std::string("ref_any_") + inPrc.name();
 
+        auto ngOutPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
         auto depthConst = ngraph::builder::makeConstant<size_t>(ngraph::element::i32, {}, {depth});
-        auto onConst = ngraph::builder::makeConstant<float>(ngraph::element::f32, {}, {onValue});
-        auto offConst = ngraph::builder::makeConstant<float>(ngraph::element::f32, {}, {offValue});
+        auto onConst = ngraph::builder::makeConstant<float>(ngOutPrc, {}, {onValue});
+        auto offConst = ngraph::builder::makeConstant<float>(ngOutPrc, {}, {offValue});
 
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
         auto inputParams = ngraph::builder::makeParams(ngPrc, { inputShape });
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/psroi_pooling.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/psroi_pooling.cpp
index 585588b1767..517badc6915 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/psroi_pooling.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/psroi_pooling.cpp
@@ -92,7 +92,7 @@ protected:
         auto psroi = std::make_shared<ngraph::op::v0::PSROIPooling>(params[0], coords, outputDim, groupSize,
                                                        spatialScale, spatialBinsX, spatialBinsY, mode);
         psroi->get_rt_info() = getCPUInfo();
-        selectedType = std::string("unknown_") + inPrc.name();
+        selectedType = getPrimitiveType() + "_" + inPrc.name();
 
         threshold = 0.001f;
         const ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(psroi)};
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/reduce_ops.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/reduce_ops.cpp
index 38d0d083769..dc528b82fad 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/reduce_ops.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/reduce_ops.cpp
@@ -43,7 +43,7 @@ protected:
         std::vector<int> axes;
         CommonTestUtils::OpType opType;
         std::tie(axes, opType, keepDims, reductionType, netPrecision, inPrc, outPrc, inLayout, inputShape, targetDevice) = basicParamsSet;
-
+        inPrc = outPrc = netPrecision;
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
         auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
         auto paramOuts = ngraph::helpers::convert2OutputVector(
@@ -68,7 +68,7 @@ protected:
 
         const auto reduce = ngraph::builder::makeReduce(paramOuts[0], reductionAxesNode, keepDims, reductionType);
 
-        selectedType = getPrimitiveType() + "_" + inPrc.name();
+        selectedType = getPrimitiveType() + "_" + (inPrc == Precision::BOOL ? "I8" : inPrc.name());
 
         reduce->get_rt_info() = getCPUInfo();
 
@@ -103,18 +103,8 @@ private:
 TEST_P(ReduceCPULayerTest, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
-    auto ops = function->get_ordered_ops();
-    std::string name = (*(++ops.rbegin()))->get_type_name();
-
-    if ("ReduceLogicalAnd" == name) {
-        name = "ReduceAnd";
-    }
-    if ("ReduceLogicalOr" == name) {
-        name = "ReduceOr";
-    }
-
     Run();
-    CheckPluginRelatedResults(executableNetwork, name);
+    CheckPluginRelatedResults(executableNetwork, "Reduce");
 }
 namespace {
 std::vector<Precision> inpOutPrc = {Precision::BF16, Precision::FP32};
@@ -186,9 +176,9 @@ const auto paramsOneAxis = ::testing::Combine(
             testing::ValuesIn(opTypes),
             testing::ValuesIn(keepDims),
             testing::ValuesIn(reductionTypes),
-            testing::Values(InferenceEngine::Precision::FP32),
-            testing::ValuesIn(inpOutPrc),
             testing::ValuesIn(inpOutPrc),
+            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
             testing::Values(InferenceEngine::Layout::ANY),
             testing::ValuesIn(inputShapes),
             testing::Values(CommonTestUtils::DEVICE_CPU)),
@@ -201,8 +191,8 @@ const auto paramsOneAxisLogical = testing::Combine(
             testing::ValuesIn(keepDims),
             testing::ValuesIn(reductionLogicalTypes),
             testing::Values(InferenceEngine::Precision::BOOL),
-            testing::ValuesIn(inpOutPrc),
-            testing::ValuesIn(inpOutPrc),
+            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
             testing::Values(InferenceEngine::Layout::ANY),
             testing::ValuesIn(inputShapes),
             testing::Values(CommonTestUtils::DEVICE_CPU)),
@@ -214,9 +204,9 @@ const auto params_MultiAxis = testing::Combine(
             testing::Values(opTypes[1]),
             testing::Values(false),
             testing::ValuesIn(reductionTypes),
-            testing::Values(InferenceEngine::Precision::FP32),
-            testing::ValuesIn(inpOutPrc),
             testing::ValuesIn(inpOutPrc),
+            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
             testing::Values(InferenceEngine::Layout::ANY),
             testing::Values(std::vector<size_t>{2, 9, 2, 9}),
             testing::Values(CommonTestUtils::DEVICE_CPU)),
@@ -228,9 +218,9 @@ const auto params_MultiAxis_4D = testing::Combine(
                 testing::Values(opTypes[1]),
                 testing::Values(true),
                 testing::ValuesIn(reductionTypes),
-                testing::Values(InferenceEngine::Precision::FP32),
-                testing::ValuesIn(inpOutPrc),
                 testing::ValuesIn(inpOutPrc),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                 testing::Values(InferenceEngine::Layout::ANY),
                 testing::Values(std::vector<size_t>{2, 19, 2, 9}),
                 testing::Values(CommonTestUtils::DEVICE_CPU)),
@@ -242,9 +232,9 @@ const auto params_MultiAxis_5D = testing::Combine(
                 testing::Values(opTypes[1]),
                 testing::Values(true),
                 testing::ValuesIn(reductionTypes),
-                testing::Values(InferenceEngine::Precision::FP32),
-                testing::ValuesIn(inpOutPrc),
                 testing::ValuesIn(inpOutPrc),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                 testing::Values(InferenceEngine::Layout::ANY),
                 testing::Values(std::vector<size_t>{2, 19, 7, 2, 9}),
                 testing::Values(CommonTestUtils::DEVICE_CPU)),
@@ -257,8 +247,8 @@ const auto params_MultiAxisLogical = testing::Combine(
             testing::Values(false),
             testing::ValuesIn(reductionLogicalTypes),
             testing::Values(InferenceEngine::Precision::BOOL),
-            testing::ValuesIn(inpOutPrc),
-            testing::ValuesIn(inpOutPrc),
+            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+            testing::Values(InferenceEngine::Precision::UNSPECIFIED),
             testing::Values(InferenceEngine::Layout::ANY),
             testing::Values(std::vector<size_t>{2, 9, 2, 9}),
             testing::Values(CommonTestUtils::DEVICE_CPU)),
@@ -271,8 +261,8 @@ const auto params_MultiAxisLogical4D = testing::Combine(
                 testing::Values(true),
                 testing::ValuesIn(reductionLogicalTypes),
                 testing::Values(InferenceEngine::Precision::BOOL),
-                testing::ValuesIn(inpOutPrc),
-                testing::ValuesIn(inpOutPrc),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                 testing::Values(InferenceEngine::Layout::ANY),
                 testing::Values(std::vector<size_t>{2, 19, 2, 9}),
                 testing::Values(CommonTestUtils::DEVICE_CPU)),
@@ -285,8 +275,8 @@ const auto params_MultiAxisLogical5D = testing::Combine(
                 testing::Values(true),
                 testing::ValuesIn(reductionLogicalTypes),
                 testing::Values(InferenceEngine::Precision::BOOL),
-                testing::ValuesIn(inpOutPrc),
-                testing::ValuesIn(inpOutPrc),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                 testing::Values(InferenceEngine::Layout::ANY),
                 testing::Values(std::vector<size_t>{2, 19, 7, 2, 9}),
                 testing::Values(CommonTestUtils::DEVICE_CPU)),
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/region_yolo.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/region_yolo.cpp
index 3d8718f2045..46a364178df 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/region_yolo.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/region_yolo.cpp
@@ -67,7 +67,7 @@ protected:
 
         configuration.insert(additionalConfig.begin(), additionalConfig.end());
 
-        selectedType = std::string("unknown_") + inPrc.name();
+        selectedType = getPrimitiveType() + "_" + inPrc.name();
 
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
         auto param = std::make_shared<ngraph::op::Parameter>(ngPrc, inputShape);
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp
index 671539db351..009dcdd01f2 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp
@@ -148,8 +148,8 @@ namespace {
 std::vector<std::map<std::string, std::string>> additionalConfig
     = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
 
-CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"};
-CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"};
+CPUSpecificParams cpuParams{{ntc, ntc}, {tnc, ntc}, {"ref_any"}, "ref_any"};
+CPUSpecificParams cpuParamsBatchSizeOne{{ntc, ntc}, {tnc, ntc}, {"ref_any"}, "ref_any"};
 
 std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
 // output values increase rapidly without clip, so use only seq_lenghts = 2
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp
index 1bd230d091c..d672629ab0c 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp
@@ -74,7 +74,7 @@ TEST_P(SoftMaxLayerCPUTest, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     Run();
-    CheckPluginRelatedResults(executableNetwork, "SoftMax");
+    CheckPluginRelatedResults(executableNetwork, "Softmax");
 }
 
 namespace {
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/space_to_batch.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/space_to_batch.cpp
index 3677b889bfb..73bae4cb35e 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/space_to_batch.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/space_to_batch.cpp
@@ -48,9 +48,9 @@ protected:
         inPrc = outPrc = netPrecision;
 
         if (strcmp(netPrecision.name(), "U8") == 0)
-            selectedType = std::string("unknown_") + "I8";
+            selectedType = std::string("ref_any_") + "I8";
         else
-            selectedType = std::string("unknown_") + netPrecision.name();
+            selectedType = std::string("ref_any_") + netPrecision.name();
 
         auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
         auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/permute.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp
similarity index 88%
rename from inference-engine/tests/functional/plugin/cpu/single_layer_tests/permute.cpp
rename to inference-engine/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp
index e95cb6ffa5f..b3cb50b40b3 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/permute.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp
@@ -6,7 +6,7 @@
 #include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
 
-// Since the Transpose ngraph operation is converted to the permute node, we will use it in the permute test
+// Since the Transpose ngraph operation is converted to the transpose node, we will use it in the transpose test
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
@@ -19,12 +19,12 @@ typedef std::tuple<
         std::vector<size_t>,            // Input shapes
         std::string,                    // Target device name
         std::map<std::string, std::string>, // Additional network configuration
-        CPUSpecificParams> PermuteLayerCPUTestParamSet;
+        CPUSpecificParams> TransposeLayerCPUTestParamSet;
 
-class PermuteLayerCPUTest : public testing::WithParamInterface<PermuteLayerCPUTestParamSet>,
+class TransposeLayerCPUTest : public testing::WithParamInterface<TransposeLayerCPUTestParamSet>,
                             virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
 public:
-    static std::string getTestCaseName(testing::TestParamInfo<PermuteLayerCPUTestParamSet> obj) {
+    static std::string getTestCaseName(testing::TestParamInfo<TransposeLayerCPUTestParamSet> obj) {
         Precision netPrecision;
         std::vector<size_t> inputShape, inputOrder;
         std::string targetDevice;
@@ -72,11 +72,11 @@ protected:
     }
 };
 
-TEST_P(PermuteLayerCPUTest, CompareWithRefs) {
+TEST_P(TransposeLayerCPUTest, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     Run();
-    CheckPluginRelatedResults(executableNetwork, "Permute");
+    CheckPluginRelatedResults(executableNetwork, "Transpose");
 }
 
 namespace {
@@ -138,7 +138,7 @@ const auto params4D = ::testing::Combine(
         ::testing::Values(additional_config),
         ::testing::ValuesIn(CPUParams4D));
 
-INSTANTIATE_TEST_CASE_P(smoke_Permute4D_CPU, PermuteLayerCPUTest, params4D, PermuteLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Transpose4D_CPU, TransposeLayerCPUTest, params4D, TransposeLayerCPUTest::getTestCaseName);
 
 const auto paramsPerChannels4D = ::testing::Combine(
         ::testing::ValuesIn(inputOrderPerChannels4D),
@@ -148,7 +148,7 @@ const auto paramsPerChannels4D = ::testing::Combine(
         ::testing::Values(additional_config),
         ::testing::Values(cpuParams_nhwc));
 
-INSTANTIATE_TEST_CASE_P(smoke_PermutePerChannels4D_CPU, PermuteLayerCPUTest, paramsPerChannels4D, PermuteLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_PermutePerChannels4D_CPU, TransposeLayerCPUTest, paramsPerChannels4D, TransposeLayerCPUTest::getTestCaseName);
 
 const std::vector<std::vector<size_t>> inputShapes5D = {
         {2, 32, 5, 10, 20}
@@ -191,7 +191,7 @@ const auto params5D = ::testing::Combine(
         ::testing::Values(additional_config),
         ::testing::ValuesIn(CPUParams5D));
 
-INSTANTIATE_TEST_CASE_P(smoke_Permute5D_CPU, PermuteLayerCPUTest, params5D, PermuteLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Transpose5D_CPU, TransposeLayerCPUTest, params5D, TransposeLayerCPUTest::getTestCaseName);
 
 const auto paramsPerChannels5D = ::testing::Combine(
         ::testing::ValuesIn(inputOrderPerChannels5D),
@@ -201,7 +201,7 @@ const auto paramsPerChannels5D = ::testing::Combine(
         ::testing::Values(additional_config),
         ::testing::Values(cpuParams_ndhwc));
 
-INSTANTIATE_TEST_CASE_P(smoke_PermutePerChannels5D_CPU, PermuteLayerCPUTest, paramsPerChannels5D, PermuteLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_PermutePerChannels5D_CPU, TransposeLayerCPUTest, paramsPerChannels5D, TransposeLayerCPUTest::getTestCaseName);
 
 } // namespace
 } // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/conv_concat.hpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/conv_concat.hpp
index d4a89cb0e99..9f034105853 100644
--- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/conv_concat.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/conv_concat.hpp
@@ -17,31 +17,6 @@ using namespace CPUTestUtils;
 
 namespace SubgraphTestsDefinitions {
 
-enum class nodeType {
-    convolution,
-    convolutionBackpropData,
-    groupConvolution,
-    groupConvolutionBackpropData
-};
-
-std::string nodeType2PluginType(nodeType nt) {
-    if (nt == nodeType::convolution) return "Convolution";
-    if (nt == nodeType::convolutionBackpropData) return "Deconvolution";
-    if (nt == nodeType::groupConvolution) return "Convolution";
-    if (nt == nodeType::groupConvolutionBackpropData) return "Deconvolution";
-    assert(!"unknown node type");
-    return "undef";
-}
-
-std::string nodeType2str(nodeType nt) {
-    if (nt == nodeType::convolution) return "Convolution";
-    if (nt == nodeType::convolutionBackpropData) return "ConvolutionBackpropData";
-    if (nt == nodeType::groupConvolution) return "GroupConvolution";
-    if (nt == nodeType::groupConvolutionBackpropData) return "GroupConvolutionBackpropData";
-    assert(!"unknown node type");
-    return "undef";
-}
-
 using commonConvParams =  std::tuple<
     InferenceEngine::SizeVector,    // Kernel size
     InferenceEngine::SizeVector,    // Strides
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_transpose_reorder.hpp
similarity index 69%
rename from inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp
rename to inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_transpose_reorder.hpp
index 2d6fcce3aa4..e719b450ad4 100644
--- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_permute_reorder.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/fuse_transpose_reorder.hpp
@@ -17,31 +17,31 @@ using namespace CPUTestUtils;
 
 namespace SubgraphTestsDefinitions {
 
-using FusePermuteAndReorderParams = std::tuple<
+using FuseTransposeAndReorderParams = std::tuple<
         InferenceEngine::SizeVector,    // Input shape
         InferenceEngine::Precision      // Input precision
 >;
 
-class FusePermuteAndReorderTest : public testing::WithParamInterface<FusePermuteAndReorderParams>, public CPUTestsBase,
+class FuseTransposeAndReorderTest : public testing::WithParamInterface<FuseTransposeAndReorderParams>, public CPUTestsBase,
         virtual public LayerTestsUtils::LayerTestsCommon {
 public:
-    static std::string getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj);
+    static std::string getTestCaseName(testing::TestParamInfo<FuseTransposeAndReorderParams> obj);
 
 protected:
     void SetUp() override;
     virtual void CreateGraph();
-    void CheckPermuteCount(size_t expectedPermuteCount);
+    void CheckTransposeCount(size_t expectedTransposeCount);
 
     InferenceEngine::SizeVector inputShape;
     InferenceEngine::Precision inPrec;
 };
 
-class FusePermuteAndReorderTest1 : public FusePermuteAndReorderTest {
+class FuseTransposeAndReorderTest1 : public FuseTransposeAndReorderTest {
 protected:
     void CreateGraph() override;
 };
 
-class FusePermuteAndReorderTest2 : public FusePermuteAndReorderTest {
+class FuseTransposeAndReorderTest2 : public FuseTransposeAndReorderTest {
 protected:
     void CreateGraph() override;
 };
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp
index 55dd27d564d..de0faf836d9 100644
--- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/add_conver_to_reorder.cpp
@@ -65,9 +65,11 @@ namespace  {
 */
 
 TEST_F(AddConvertToReorderTest, smoke_TestAddConvert_CPU) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     BuildGraph(ngraph::element::bf16);
     Run();
-    CheckNodeOfTypeCount(executableNetwork, "Convert", 1);
+    CheckNodeOfTypeCount(executableNetwork, "Convert", with_cpu_x86_avx512_core() ? 1 : 0);
     CheckNodeOfTypeCount(executableNetwork, "Reorder", 0);
 }
 
@@ -84,6 +86,8 @@ TEST_F(AddConvertToReorderTest, smoke_TestAddConvert_CPU) {
              Output[FP32]
 */
 TEST_F(AddConvertToReorderTest, smoke_TestAddReorder_CPU) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     BuildGraph(ngraph::element::i8);
     Run();
     CheckNodeOfTypeCount(executableNetwork, "Convert", 0);
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv3d_reshape.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv3d_reshape.cpp
new file mode 100644
index 00000000000..80ff36aeeae
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv3d_reshape.cpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace SubgraphTestsDefinitions {
+
+using Conv3dReshapeTestParams = std::tuple<nodeType,
+                                           size_t>;
+
+class Conv3dReshapeTest : public testing::WithParamInterface<Conv3dReshapeTestParams>,
+                          virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<Conv3dReshapeTestParams> obj) {
+        nodeType conv;
+        size_t numOut;
+        std::tie(conv, numOut) = obj.param;
+
+        std::ostringstream result;
+        result << nodeType2str(conv) << "_";
+        result << "NUM_OUTPUTS=" << numOut;
+
+        return result.str();
+    }
+
+protected:
+     std::string cpuNodeType;
+
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        nodeType convType;
+        size_t numOut;
+        std::tie(convType, numOut) = this->GetParam();
+
+        cpuNodeType = nodeType2PluginType(convType);
+
+        auto inputParams = builder::makeParams(element::f32, {Shape{1, 1024, 64}});
+        auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(inputParams));
+
+        std::shared_ptr<Node> conv;
+        const std::vector<size_t> kernelSize = {1};
+        const std::vector<size_t> strides = {1};
+        const std::vector<ptrdiff_t> padBegin = {0};
+        const std::vector<ptrdiff_t> padEnd = {0};
+        const std::vector<size_t> dilation = {1};
+        const size_t numOutChannels = 30;
+        const size_t numOfGroups = 2;
+        const op::PadType paddingType = op::PadType::EXPLICIT;
+        switch (convType) {
+            case nodeType::convolution : {
+                conv = builder::makeConvolution(paramOuts[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels);
+                break;
+            }
+            case nodeType::groupConvolution : {
+                conv = builder::makeGroupConvolution(paramOuts[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels,
+                                                     numOfGroups);
+                break;
+            }
+            default: {
+                throw std::runtime_error("Conv3dReshapeTest doesn't support this type of operation");
+            }
+        }
+
+        ResultVector results;
+        for (int i = 0; i < numOut; i++) {
+            auto mockNode = std::make_shared<opset5::Multiply>(conv->output(0), opset5::Constant::create(element::f32, Shape{1}, {1}));
+            results.push_back(std::make_shared<opset5::Result>(mockNode));
+        }
+
+        function = std::make_shared<ngraph::Function>(results, inputParams, "Conv3dReshape");
+    }
+};
+
+TEST_P(Conv3dReshapeTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+}
+
+namespace {
+
+const std::vector<nodeType> convType = { nodeType::convolution, nodeType::groupConvolution };
+const std::vector<size_t> numOut = { 1, 2, 5 };
+const auto conv3dReshapeParams = ::testing::Combine(::testing::ValuesIn(convType),
+                                                    ::testing::ValuesIn(numOut));
+
+INSTANTIATE_TEST_CASE_P(smoke_Conv3dReshapeTest, Conv3dReshapeTest, conv3dReshapeParams, Conv3dReshapeTest::getTestCaseName);
+
+} // namespace
+
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv_maxpool_activ.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv_maxpool_activ.cpp
new file mode 100644
index 00000000000..632fc25d1dd
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv_maxpool_activ.cpp
@@ -0,0 +1,85 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils/fusing_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace SubgraphTestsDefinitions {
+
+using ConvPoolActivTestParams = fusingSpecificParams;
+
+class ConvPoolActivTest : public testing::WithParamInterface<ConvPoolActivTestParams>, public CpuTestWithFusing,
+                          virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConvPoolActivTestParams> obj) {
+        fusingSpecificParams fusingParams = obj.param;
+
+        std::ostringstream result;
+        result << "ConvPoolActivTest";
+        result << CpuTestWithFusing::getTestCaseName(fusingParams);
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        fusingSpecificParams fusingParams = this->GetParam();
+        std::tie(postOpMgrPtr, fusedOps) = fusingParams;
+
+        auto inputParams = builder::makeParams(element::f32, {Shape{1, 3, 40, 40}});
+        auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(inputParams));
+
+        std::shared_ptr<Node> conv;
+        {
+            const std::vector<size_t> kernelSize = {3, 3};
+            const std::vector<size_t> strides = {2, 1};
+            const std::vector<ptrdiff_t> padBegin = {0, 0};
+            const std::vector<ptrdiff_t> padEnd = {0, 0};
+            const std::vector<size_t> dilation = {1, 1};
+            const size_t numOutChannels = 16;
+            const op::PadType paddingType = op::PadType::EXPLICIT;
+            conv = builder::makeConvolution(paramOuts[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels);
+        }
+        std::shared_ptr<Node> pooling;
+        {
+            const std::vector<size_t> kernelSize = {3, 3};
+            const std::vector<size_t> strides = {1, 1};
+            const std::vector<size_t> padBegin = {0, 0};
+            const std::vector<size_t> padEnd = {0, 0};
+            const op::PadType paddingType = op::PadType::EXPLICIT;
+            ngraph::helpers::PoolingTypes poolType = ngraph::helpers::PoolingTypes::MAX;
+            ngraph::op::RoundingType roundingType = ngraph::op::RoundingType::CEIL;
+            pooling = builder::makePooling(conv, strides, padBegin, padEnd, kernelSize, roundingType, paddingType, false, poolType);
+        }
+
+        function = makeNgraphFunction(element::f32, inputParams, pooling, "ConvPoolActiv");
+    }
+};
+
+TEST_P(ConvPoolActivTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckFusingResults(executableNetwork, "Convolution");
+}
+
+namespace {
+
+const std::vector<fusingSpecificParams> fusingParamsSet {
+        emptyFusingSpec,
+        fusingRelu,
+        fusingSwish,
+        fusingSigmoid
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_Check, ConvPoolActivTest, ::testing::ValuesIn(fusingParamsSet), ConvPoolActivTest::getTestCaseName);
+
+} // namespace
+
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp
similarity index 62%
rename from inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp
rename to inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp
index 16f5e680229..6cefb1b5be8 100644
--- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp
@@ -2,14 +2,14 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "subgraph_tests/include/fuse_permute_reorder.hpp"
+#include "subgraph_tests/include/fuse_transpose_reorder.hpp"
 
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
 
 namespace SubgraphTestsDefinitions {
 
-std::string FusePermuteAndReorderTest::getTestCaseName(testing::TestParamInfo<FusePermuteAndReorderParams> obj) {
+std::string FuseTransposeAndReorderTest::getTestCaseName(testing::TestParamInfo<FuseTransposeAndReorderParams> obj) {
     std::ostringstream result;
     SizeVector inputShape;
     Precision inPrec;
@@ -21,11 +21,11 @@ std::string FusePermuteAndReorderTest::getTestCaseName(testing::TestParamInfo<Fu
     return result.str();
 }
 
-void FusePermuteAndReorderTest::CheckPermuteCount(size_t expectedPermuteCount) {
+void FuseTransposeAndReorderTest::CheckTransposeCount(size_t expectedTransposeCount) {
     InferenceEngine::CNNNetwork execGraphInfo = executableNetwork.GetExecGraphInfo();
     auto function = execGraphInfo.getFunction();
     ASSERT_NE(nullptr, function);
-    size_t actualPermuteCount = 0;
+    size_t actualTransposeCount = 0;
     for (const auto &node : function->get_ops()) {
         const auto & rtInfo = node->get_rt_info();
         auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
@@ -35,34 +35,34 @@ void FusePermuteAndReorderTest::CheckPermuteCount(size_t expectedPermuteCount) {
             IE_ASSERT(nullptr != value);
             return value->get();
         };
-        if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == "Permute") {
-            actualPermuteCount++;
+        if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == "Transpose") {
+            actualTransposeCount++;
         }
     }
 
-    ASSERT_EQ(expectedPermuteCount, actualPermuteCount);
+    ASSERT_EQ(expectedTransposeCount, actualTransposeCount);
 }
 
-void FusePermuteAndReorderTest::SetUp() {
+void FuseTransposeAndReorderTest::SetUp() {
     targetDevice = CommonTestUtils::DEVICE_CPU;
 
     std::tie(inputShape, inPrec) = this->GetParam();
     CreateGraph();
 }
 
-const auto fusePermuteAndReorderCommonParams = ::testing::Combine(
+const auto fuseTransposeAndReorderCommonParams = ::testing::Combine(
         ::testing::Values(SizeVector{1, 2, 3, 4}, SizeVector{1, 2, 3, 4, 5}),
         ::testing::Values(Precision::I8, Precision::U8)
 );
 
-/*  FusePermuteAndReorderTest graph
+/*  FuseTransposeAndReorderTest graph
       ---------
       |Input  |
       ---------
           |
     -------------
     | --------- |
-    | |Permute| |
+    | |Transpose| |
     | --------- |
     |     |     |
     | --------- |
@@ -75,7 +75,7 @@ const auto fusePermuteAndReorderCommonParams = ::testing::Combine(
       ---------
 */
 
-void FusePermuteAndReorderTest::CreateGraph() {
+void FuseTransposeAndReorderTest::CreateGraph() {
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
     auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
 
@@ -83,37 +83,37 @@ void FusePermuteAndReorderTest::CreateGraph() {
     auto memFmt = inputShape.size() == 5 ? ndhwc : nhwc;
 
     auto constOrder = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
-    auto permute = std::make_shared<ngraph::opset5::Transpose>(params[0], constOrder);
-    permute->get_rt_info() = makeCPUInfo({memFmt}, {memFmt}, {});
+    auto transpose = std::make_shared<ngraph::opset5::Transpose>(params[0], constOrder);
+    transpose->get_rt_info() = makeCPUInfo({memFmt}, {memFmt}, {});
 
-    ngraph::ResultVector results{std::make_shared<ngraph::opset5::Result>(permute)};
-    function = std::make_shared<ngraph::Function>(results, params, "PermuteReorder");
+    ngraph::ResultVector results{std::make_shared<ngraph::opset5::Result>(transpose)};
+    function = std::make_shared<ngraph::Function>(results, params, "TransposeReorder");
 }
 
-TEST_P(FusePermuteAndReorderTest, CompareWithRefs) {
+TEST_P(FuseTransposeAndReorderTest, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     Run();
-    CheckPermuteCount(0);
+    CheckTransposeCount(0);
 }
 
-INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndReorderCommonParams, FusePermuteAndReorderTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Basic, FuseTransposeAndReorderTest, fuseTransposeAndReorderCommonParams, FuseTransposeAndReorderTest::getTestCaseName);
 
 
-/*  FusePermuteAndReorderTest1 graph
+/*  FuseTransposeAndReorderTest1 graph
              ---------
              |Input  |
              ---------
                  |
              ---------
-             |Permute|
+             |Transpose|
              ---------
                  |
         -------------------
         |                 |
         |           -------------
         |           | --------- |
-        |           | |Permute| |
+        |           | |Transpose| |
     ---------       | --------- |
     |Reshape|       |     |     |
     ---------       | --------- |
@@ -122,7 +122,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndRe
         |           |-----------|
         |                 |
         |             ---------
-        |             |Permute|
+        |             |Transpose|
         |             ---------
         |                 |
         --------   --------
@@ -136,60 +136,60 @@ INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndRe
              ---------
 */
 
-void FusePermuteAndReorderTest1::CreateGraph() {
+void FuseTransposeAndReorderTest1::CreateGraph() {
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
     auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
 
     auto order = inputShape.size() == 5 ? std::vector<int64_t>{0, 2, 3, 4, 1} : std::vector<int64_t>{0, 2, 3, 1};
 
     auto constOrder1 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
-    auto permute1 = std::make_shared<ngraph::opset5::Transpose>(params[0], constOrder1);
+    auto transpose1 = std::make_shared<ngraph::opset5::Transpose>(params[0], constOrder1);
     auto memFmt1 = inputShape.size() == 5 ? ndhwc : nhwc;
-    permute1->get_rt_info() = makeCPUInfo({memFmt1}, {memFmt1}, {});
+    transpose1->get_rt_info() = makeCPUInfo({memFmt1}, {memFmt1}, {});
 
     auto constOrder2 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
-    auto permute2 = std::make_shared<ngraph::opset5::Transpose>(permute1, constOrder2);
+    auto transpose2 = std::make_shared<ngraph::opset5::Transpose>(transpose1, constOrder2);
     auto memFmt2 = inputShape.size() == 5 ? ndhwc : nhwc;
-    permute2->get_rt_info() = makeCPUInfo({memFmt2}, {memFmt2}, {});
+    transpose2->get_rt_info() = makeCPUInfo({memFmt2}, {memFmt2}, {});
 
     auto constOrder3 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
-    auto permute3 = std::make_shared<ngraph::opset5::Transpose>(permute2, constOrder3);
+    auto transpose3 = std::make_shared<ngraph::opset5::Transpose>(transpose2, constOrder3);
     auto memFmt3 = inputShape.size() == 5 ? ncdhw : nchw;
-    permute3->get_rt_info() = makeCPUInfo({memFmt3}, {memFmt3}, {});
+    transpose3->get_rt_info() = makeCPUInfo({memFmt3}, {memFmt3}, {});
 
-    auto shape = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, permute3->get_output_shape(0));
-    auto reshape = std::make_shared<ngraph::opset5::Reshape>(permute1, shape, false);
+    auto shape = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, transpose3->get_output_shape(0));
+    auto reshape = std::make_shared<ngraph::opset5::Reshape>(transpose1, shape, false);
 
-    auto concat = ngraph::builder::makeConcat({permute3, reshape}, 1);
+    auto concat = ngraph::builder::makeConcat({transpose3, reshape}, 1);
 
     ngraph::ResultVector results{std::make_shared<ngraph::opset5::Result>(concat)};
-    function = std::make_shared<ngraph::Function>(results, params, "Permute_PermuteReorderPermute_Reshape_Concat");
+    function = std::make_shared<ngraph::Function>(results, params, "Transpose_TransposeReorderTranspose_Reshape_Concat");
 }
 
-TEST_P(FusePermuteAndReorderTest1, CompareWithRefs) {
+TEST_P(FuseTransposeAndReorderTest1, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     Run();
-    CheckPermuteCount(2);
+    CheckTransposeCount(2);
 }
 
-INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest1, fusePermuteAndReorderCommonParams, FusePermuteAndReorderTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Basic, FuseTransposeAndReorderTest1, fuseTransposeAndReorderCommonParams, FuseTransposeAndReorderTest::getTestCaseName);
 
 
-/*  FusePermuteAndReorderTest2 graph
+/*  FuseTransposeAndReorderTest2 graph
     ---------         ---------
     |Input  |         |Input  |
     ---------         ---------
         |                 |
         |           -------------
-    ---------       | --------- |
-    |Reorder|       | |Permute| |
-    ---------       | --------- |
-        |           |     |     |
-    ---------       | --------- |
-    |Permute|       | |Reorder| |
-    ---------       | --------- |
-        |           |-----------|
+    ---------       | ----------- |
+    |Reorder|       | |Transpose| |
+    ---------       | ----------- |
+        |           |      |      |
+    ---------       | ----------- |
+    |Transpose|     |  |Reorder|  |
+    ---------       | ----------- |
+        |           |-------------|
         |                 |
         --------   --------
                |   |
@@ -202,7 +202,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest1, fusePermuteAndR
              ---------
 */
 
-void FusePermuteAndReorderTest2::CreateGraph() {
+void FuseTransposeAndReorderTest2::CreateGraph() {
     auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
 
     auto inputShape2(inputShape);
@@ -212,28 +212,28 @@ void FusePermuteAndReorderTest2::CreateGraph() {
     auto order = inputShape.size() == 5 ? std::vector<int64_t>{0, 4, 1, 2, 3} : std::vector<int64_t>{0, 3, 1, 2};
 
     auto constOrder1 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
-    auto permute1 = std::make_shared<ngraph::opset5::Transpose>(params[0], constOrder1);
+    auto transpose1 = std::make_shared<ngraph::opset5::Transpose>(params[0], constOrder1);
     auto memFmt1 = inputShape.size() == 5 ? ndhwc : nhwc;
-    permute1->get_rt_info() = makeCPUInfo({memFmt1}, {memFmt1}, {});
+    transpose1->get_rt_info() = makeCPUInfo({memFmt1}, {memFmt1}, {});
 
     auto constOrder2 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order);
-    auto permute2 = std::make_shared<ngraph::opset5::Transpose>(params[1], constOrder2);
+    auto transpose2 = std::make_shared<ngraph::opset5::Transpose>(params[1], constOrder2);
     auto memFmt2 = inputShape.size() == 5 ? ncdhw : nchw;
-    permute2->get_rt_info() = makeCPUInfo({memFmt2}, {memFmt2}, {});
+    transpose2->get_rt_info() = makeCPUInfo({memFmt2}, {memFmt2}, {});
 
-    auto concat = ngraph::builder::makeConcat({permute1, permute2}, 1);
+    auto concat = ngraph::builder::makeConcat({transpose1, transpose2}, 1);
 
     ngraph::ResultVector results{std::make_shared<ngraph::opset5::Result>(concat)};
-    function = std::make_shared<ngraph::Function>(results, params, "Permute_Permute_Concat");
+    function = std::make_shared<ngraph::Function>(results, params, "Transpose_Transpose_Concat");
 }
 
-TEST_P(FusePermuteAndReorderTest2, CompareWithRefs) {
+TEST_P(FuseTransposeAndReorderTest2, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     Run();
-    CheckPermuteCount(1);
+    CheckTransposeCount(1);
 }
 
-INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest2, fusePermuteAndReorderCommonParams, FusePermuteAndReorderTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Basic, FuseTransposeAndReorderTest2, fuseTransposeAndReorderCommonParams, FuseTransposeAndReorderTest::getTestCaseName);
 
 }  // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp
new file mode 100644
index 00000000000..1ed41cb5b2b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils/fusing_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace SubgraphTestsDefinitions {
+
+using ReshapeFCTestParams = std::tuple<std::pair<SizeVector, SizeVector>, // IS fully connected
+                                       bool,                              // transpose B
+                                       fusingSpecificParams>;
+
+class ReshapeFCTest : public testing::WithParamInterface<ReshapeFCTestParams>, public CpuTestWithFusing,
+                      virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ReshapeFCTestParams> obj) {
+        std::pair<SizeVector, SizeVector> isFc;
+        bool transpB;
+        fusingSpecificParams fusingParams;
+        std::tie(isFc, transpB, fusingParams) = obj.param;
+        SizeVector isA = isFc.first; SizeVector isB = isFc.second;
+
+        std::ostringstream result;
+        result << "IS_reshape=" << CommonTestUtils::vec2str(isA) << "_";
+        result << "IS_fc_B=" << CommonTestUtils::vec2str(isB) << "_";
+        result << "Transp_B=" << transpB;
+        result << CpuTestWithFusing::getTestCaseName(fusingParams);
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+        std::pair<SizeVector, SizeVector> isFc;
+        bool transpB;
+        fusingSpecificParams fusingParams;
+        std::tie(isFc, transpB, fusingParams) = this->GetParam();
+        std::tie(postOpMgrPtr, fusedOps) = fusingParams;
+        SizeVector isReshape = isFc.first; SizeVector isB = isFc.second;
+        SizeVector isA(2);
+        isA[0] = isReshape[0];
+        isA[1] = std::accumulate(isReshape.begin() + 1, isReshape.end(), size_t{1}, std::multiplies<size_t>());
+        if (transpB) {
+            std::swap(*(isB.end() - 1), *(isB.end() - 2));
+        }
+
+        auto inputParams = builder::makeParams(element::f32, {isReshape});
+        auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(inputParams));
+
+        auto constNode = builder::makeConstant(element::i64, {isA.size()}, isA);
+        auto reshape = std::make_shared<opset1::Reshape>(paramOuts[0], constNode, true);
+
+        auto matrixB = builder::makeConstant<float>(element::f32, isB, {}, true);
+        auto matMul = builder::makeMatMul(reshape, matrixB, false, transpB);
+
+        function = makeNgraphFunction(element::f32, inputParams, matMul, "ReshapeFC");
+    }
+};
+
+TEST_P(ReshapeFCTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckNodeOfTypeCount(executableNetwork, "Reshape", 0);
+    CheckFusingResults(executableNetwork, "FullyConnected");
+}
+
+namespace {
+
+const std::vector<bool> transpose = {
+    true, false
+};
+
+const std::vector<std::pair<SizeVector, SizeVector>> isFC = {
+    {{71, 128, 1, 1}, {128, 20}},
+    {{1, 24, 2, 7}, {336, 16}}
+};
+
+std::vector<fusingSpecificParams> fusingParamsSet {
+        emptyFusingSpec,
+        fusingAddPerChannel
+};
+
+const auto reshapeFCParams = ::testing::Combine(::testing::ValuesIn(isFC),
+                                                ::testing::ValuesIn(transpose),
+                                                ::testing::ValuesIn(fusingParamsSet));
+
+INSTANTIATE_TEST_CASE_P(smoke_Check, ReshapeFCTest, reshapeFCParams, ReshapeFCTest::getTestCaseName);
+
+} // namespace
+
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
index b1595d8c1ce..066aae85e7c 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
@@ -83,6 +83,29 @@ namespace CPUTestUtils {
         std::string                       // selected primitive type
     >;
 
+    enum class nodeType {
+        convolution,
+        convolutionBackpropData,
+        groupConvolution,
+        groupConvolutionBackpropData
+    };
+
+    inline std::string nodeType2PluginType(nodeType nt) {
+        if (nt == nodeType::convolution) return "Convolution";
+        if (nt == nodeType::convolutionBackpropData) return "Deconvolution";
+        if (nt == nodeType::groupConvolution) return "Convolution";
+        if (nt == nodeType::groupConvolutionBackpropData) return "Deconvolution";
+        throw std::runtime_error("Undefined node type to convert to plug-in type node!");
+    }
+
+    inline std::string nodeType2str(nodeType nt) {
+        if (nt == nodeType::convolution) return "Convolution";
+        if (nt == nodeType::convolutionBackpropData) return "ConvolutionBackpropData";
+        if (nt == nodeType::groupConvolution) return "GroupConvolution";
+        if (nt == nodeType::groupConvolutionBackpropData) return "GroupConvolutionBackpropData";
+        throw std::runtime_error("Undefined node type to convert to string!");
+    }
+
 class CPUTestsBase {
 public:
     typedef std::map<std::string, std::shared_ptr<ngraph::Variant>> CPUInfo;
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp
index 52f235c7821..6f24854affb 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp
@@ -36,11 +36,11 @@ CpuTestWithFusing::modifyGraph(const ngraph::element::Type &ngPrc, ngraph::Param
     return retNode;
 }
 
-void CpuTestWithFusing::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const {
-    CPUTestsBase::CheckPluginRelatedResults(execNet, nodeType);
+void CpuTestWithFusing::CheckFusingResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const {
     InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo();
     auto function = execGraphInfo.getFunction();
     ASSERT_NE(nullptr, function);
+    bool isNodeFound = false;
     for (const auto & op : function->get_ops()) {
         const auto &rtInfo = op->get_rt_info();
 
@@ -55,6 +55,7 @@ void CpuTestWithFusing::CheckPluginRelatedResults(InferenceEngine::ExecutableNet
 
         auto layerType = getExecValue("layerType", rtInfo);
         if (layerType == nodeType) {
+            isNodeFound = true;
             auto originalLayersNames = getExecValue("originalLayersNames", rtInfo);
             std::string opFriendlyName = op->get_friendly_name();
             auto pos = originalLayersNames.find(opFriendlyName);
@@ -65,6 +66,12 @@ void CpuTestWithFusing::CheckPluginRelatedResults(InferenceEngine::ExecutableNet
             }
         }
     }
+    ASSERT_TRUE(isNodeFound) << "Node type name: \"" << nodeType << "\" has not been found.";
+}
+
+void CpuTestWithFusing::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const {
+    CPUTestsBase::CheckPluginRelatedResults(execNet, nodeType);
+    CheckFusingResults(execNet, nodeType);
 }
 
 std::shared_ptr<ngraph::Node>
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
index b084dacbd16..2483c74e847 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
@@ -65,6 +65,7 @@ protected:
                                               const std::shared_ptr<ngraph::Node> &lastNode) const override;
 
     void CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const override;
+    void CheckFusingResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const;
 
 protected:
     std::shared_ptr<postOpMgr> postOpMgrPtr;
@@ -74,23 +75,48 @@ protected:
 
 /* FUSING PATTERNS */
 const auto emptyFusingSpec = fusingSpecificParams{nullptr, {}};
+
 const auto fusingRelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu);
             }, "Relu"}}), {"Relu"}};
+
 const auto fusingElu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Elu, {}, {2.0f});
             }, "Elu"}}), {"Elu"}};
+
+const auto fusingGelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Gelu);
+            }, "Gelu"}}), {"Gelu"}};
+
 const auto fusingSigmoid = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Sigmoid);
             }, "Sigmoid"}}), {"Sigmoid"}};
+
 const auto fusingClamp = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Clamp, {}, {3.0f, 6.0f});
             }, "Clamp"}}), {"Clamp"}};
-const auto fusingPRelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+
+const auto fusingTanh = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Tanh);
+            }, "Tanh"}}), {"Tanh"}};
+
+const auto fusingAbs = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Abs);
+            }, "Abs"}}), {"Abs"}};
+
+const auto fusingSqrt = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Sqrt);
+            }, "Sqrt"}}), {"Sqrt"}};
+
+const auto fusingPReluPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 auto shape = inpNode->get_shape();
                 if (shape.size() == 1)
@@ -100,26 +126,53 @@ const auto fusingPRelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std
                 auto data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(newShape));
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, newShape, data);
             }, "PRelu(PerChannel)"}}), {"PRelu"}};
+
+const auto fusingPReluPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                ngraph::Shape shape(1, 1);
+                auto data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(shape));
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, shape, data);
+            }, "PRelu(PerTensor)"}}), {"PRelu"}};
+
 const auto fusingSwish = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Swish, {}, {1.0f});
             }, "Swish"}}), {"Swish"}};
-const auto fusingHSwish = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
-            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
-                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::HSwish, {}, {});
-            }, "HSwish"}}), {"HSwish"}};
-const auto fusingMish = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
-            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
-                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Mish, {}, {});
-            }, "Mish"}}), {"Mish"}};
+
 const auto fusingSoftPlus = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::SoftPlus, {}, {});
             }, "SoftPlus"}}), {"SoftPlus"}};
-const auto fusingTanh = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+
+const auto fusingHSwish = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
-                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Tanh, {}, {});
-            }, "Tanh"}}), {"Tanh"}};
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::HSwish, {}, {});
+            }, "HSwish"}}), {"HSwish"}};
+
+const auto fusingMish = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Mish, {}, {});
+            }, "Mish"}}), {"Mish"}};
+
+const auto fusingHSigmoid = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::HSigmoid);
+            }, "HSigmoid"}}), {"HSigmoid"}};
+
+const auto fusingReluAdd = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu);
+            }, "Relu"},
+            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+                auto shape = inpNode->get_shape();
+                if (shape.size() == 1)
+                 THROW_IE_EXCEPTION << "If shape.size() == 1 then Granularity can be PerTensor only";
+                ngraph::Shape newShape(shape.size(), 1);
+                newShape[1] = shape[1];
+                auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
+                return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
+            }, "Add(PerChannel)"}}), {"Relu", "Add"}};
+
 const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu);
@@ -130,7 +183,7 @@ const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared<postNode
                      IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
                  ngraph::Shape newShape(shape.size(), 1);
                  newShape[1] = shape[1];
-                 auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true);
+                 auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
                  return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
             }, "Multiply(PerChannel)"},
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
@@ -139,9 +192,10 @@ const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared<postNode
                  IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
                 ngraph::Shape newShape(shape.size(), 1);
                 newShape[1] = shape[1];
-                auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true);
+                auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
                 return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
             }, "Add(PerChannel)"}}), {"Relu", "Add"}};
+
 const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
                  auto shape = inpNode->get_shape();
@@ -149,7 +203,7 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg
                      IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
                  ngraph::Shape newShape(shape.size(), 1);
                  newShape[1] = shape[1];
-                 auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true);
+                 auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
                  return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
             }, "Multiply(PerChannel)"},
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
@@ -158,9 +212,10 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg
                  IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
                 ngraph::Shape newShape(shape.size(), 1);
                 newShape[1] = shape[1];
-                auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true);
+                auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
                 return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
             }, "Add(PerChannel)"}}), {"Add"} };
+
 const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 auto localPrc = inpNode->get_element_type();
@@ -171,6 +226,7 @@ const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared<
                 newShape[1] = shape[1];
                 return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
             }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"}};
+
 const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 auto localPrc = inpNode->get_element_type();
@@ -184,6 +240,7 @@ const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_sha
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu);
             }, "Relu"}}), {"FakeQuantize", "Relu"}};
+
 const auto fusingFakeQuantizePerTensorRelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
                 auto localPrc = inpNode->get_element_type();
@@ -193,6 +250,7 @@ const auto fusingFakeQuantizePerTensorRelu = fusingSpecificParams{std::make_shar
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu);
             }, "Relu"}}), {"FakeQuantize", "Relu"}};
+
 const auto fusingSum = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
                 auto shape = inpNode->get_shape();
@@ -202,6 +260,7 @@ const auto fusingSum = fusingSpecificParams{std::make_shared<postNodesMgr>(std::
                      ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(newParams));
                 return std::make_shared<ngraph::opset1::Add>(inpNode, newParamOuts[0]);
             }, "Add(Parameters)"}}), {"Add"}};
+
 const auto fusingSumEluFQ = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
         {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
             auto shape = inpNode->get_shape();
@@ -219,4 +278,65 @@ const auto fusingSumEluFQ = fusingSpecificParams{std::make_shared<postNodesMgr>(
             auto newShape = ngraph::Shape(inpNode->get_shape().size(), 1);
             return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
         }, "FakeQuantize(PerTensor)"}}), {"Add", "Elu", "FakeQuantize"}};
+
+const auto fusingMultiplyPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            ngraph::Shape secondMultInShape(1, 1);
+            auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
+            return std::make_shared<ngraph::op::v1::Multiply>(inpNode, secondMultInput);
+        }, "Multiply(PerTensor)"}}), {"Multiply"}};
+
+const auto fusingMultiplyPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            ngraph::Shape secondMultInShape(inpNode->get_shape().size(), 1);
+            secondMultInShape[1] = inpNode->get_shape()[1];
+            auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Multiply>(inpNode, secondMultInput);
+        }, "Multiply(PerChannel)"}}), {"Multiply"}};
+
+const auto fusingAddPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            ngraph::Shape secondMultInShape(1, 1);
+            auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Add>(inpNode, secondMultInput);
+        }, "Add(PerTensor)"}}), {"Add"}};
+
+const auto fusingAddPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            ngraph::Shape secondMultInShape(inpNode->get_shape().size(), 1);
+            secondMultInShape[1] = inpNode->get_shape()[1];
+            auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Add>(inpNode, secondMultInput);
+        }, "Add(PerChannel)"}}), {"Add"}};
+
+const auto fusingSubtractPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            ngraph::Shape secondMultInShape(1, 1);
+            auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Subtract>(inpNode, secondMultInput);
+        }, "Subtract(PerTensor)"}}), {"Subtract"}};
+
+const auto fusingSubtractPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            ngraph::Shape secondMultInShape(inpNode->get_shape().size(), 1);
+            secondMultInShape[1] = inpNode->get_shape()[1];
+            auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Subtract>(inpNode, secondMultInput);
+        }, "Subtract(PerChannel)"}}), {"Subtract"}};
+
+const auto fusingDividePerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            ngraph::Shape secondMultInShape(1, 1);
+            auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Divide>(inpNode, secondMultInput);
+        }, "Divide(PerTensor)"}}), {"Divide"}};
+
+const auto fusingDividePerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            ngraph::Shape secondMultInShape(inpNode->get_shape().size(), 1);
+            secondMultInShape[1] = inpNode->get_shape()[1];
+            auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Divide>(inpNode, secondMultInput);
+        }, "Divide(PerChannel)"}}), {"Divide"}};
+
 } // namespace CPUTestUtils
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp
index 7e19ba866a5..ff326a2d0a4 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp
@@ -93,16 +93,16 @@ const auto avgPoolExplicitPadCeilRoundingParams = ::testing::Combine(
 );
 
 INSTANTIATE_TEST_CASE_P(smoke_AvgPool_ExplicitPad_CeilRounding, PoolingLayerTest,
-                        ::testing::Combine(
-                                avgPoolExplicitPadCeilRoundingParams,
-                                ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(InferenceEngine::Layout::ANY),
-                                ::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
-                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
-                        PoolingLayerTest::getTestCaseName);
+                       ::testing::Combine(
+                               avgPoolExplicitPadCeilRoundingParams,
+                               ::testing::ValuesIn(netPrecisions),
+                               ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                               ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                               ::testing::Values(InferenceEngine::Layout::ANY),
+                               ::testing::Values(InferenceEngine::Layout::ANY),
+                               ::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
+                               ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                       PoolingLayerTest::getTestCaseName);
 
 /* +========== Explicit Pad Floor Rounding ========== */
 const auto avgPoolExplicitPadFloorRoundingParams = ::testing::Combine(
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/exec_graph_info.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/exec_graph_info.hpp
index b663f548598..6c92a9e45fa 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/exec_graph_info.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/exec_graph_info.hpp
@@ -72,7 +72,7 @@ TEST_P(ExecGraphTests, CheckExecGraphInfoBeforeExecution) {
         const auto originalLayers = function->get_ops();
         std::map<std::string, int> originalLayersMap;
         for (const auto &layer : originalLayers) {
-            if (layer->description() == "Result")
+            if (layer->description() == "Result" && targetDevice != CommonTestUtils::DEVICE_CPU)
                 continue;
             originalLayersMap[layer->get_friendly_name()] = 0;
         }
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/set_blob.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/set_blob.hpp
index aa9b4acc8b8..3692073d4e2 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/set_blob.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/set_blob.hpp
@@ -19,6 +19,7 @@ enum class setType {
 std::ostream& operator<<(std::ostream & os, setType type);
 
 using SetBlobParams = std::tuple<InferenceEngine::Precision,   // precision in CNNNetwork
+                                 InferenceEngine::Precision,   // precision in ngraph
                                  setType,                      // type for which blob is set
                                  std::string>;                 // Device name
 
@@ -31,7 +32,7 @@ protected:
     void SetUp() override;
 
 private:
-    InferenceEngine::Precision precision;
+    InferenceEngine::Precision precNet;
     setType type;
 };
 
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp
index 6bb2133df74..6bf5319fe74 100644
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/loop.hpp
@@ -10,10 +10,12 @@ namespace LayerTestsDefinitions {
 
 
 TEST_P(LoopTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
     Run();
 }
 
 TEST_P(StaticShapeLoopTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
     Run();
 }
 
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_concat_memory.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_concat_memory.hpp
index f634a439114..dd855df365a 100644
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_concat_memory.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_concat_memory.hpp
@@ -9,6 +9,8 @@
 namespace SubgraphTestsDefinitions {
 
 TEST_P(SplitConcatMemory, cyclicBufferCorrectness) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     auto ie = PluginCache::get().ie();
     cnnNetwork = InferenceEngine::CNNNetwork{function};
 
diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp
index 3935161957a..a90b2a6772a 100644
--- a/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp
@@ -5,6 +5,7 @@
 #include <common_test_utils/common_utils.hpp>
 #include "behavior/add_output.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/skip_tests_config.hpp"
 
 std::string AddOutputsTest::getTestCaseName(const testing::TestParamInfo<addOutputsParams> &obj) {
     std::ostringstream results;
@@ -21,6 +22,8 @@ void AddOutputsTest::SetUp() {
 }
 
 TEST_P(AddOutputsTest, smoke_CheckOutputExist) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     std::vector<std::string> expectedOutputs = outputsToAdd;
     for (const auto &out : net.getOutputsInfo()) {
         expectedOutputs.push_back(out.first);
diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/invalid_cases/proposal.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/invalid_cases/proposal.cpp
index a3484332add..67b6d340122 100644
--- a/inference-engine/tests/functional/plugin/shared/src/behavior/invalid_cases/proposal.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/behavior/invalid_cases/proposal.cpp
@@ -100,5 +100,7 @@ void ProposalBehTest::Run() {
 }
 
 TEST_P(ProposalBehTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     ASSERT_THROW(Run(), InferenceEngine::Exception);
 }
diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/set_blob.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/set_blob.cpp
index fb20d01fefa..5f2c8ff7806 100644
--- a/inference-engine/tests/functional/plugin/shared/src/behavior/set_blob.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/behavior/set_blob.cpp
@@ -27,15 +27,16 @@ std::ostream& operator<<(std::ostream & os, setType type) {
 }
 
 std::string SetBlobTest::getTestCaseName(testing::TestParamInfo<SetBlobParams> obj) {
-    Precision prec;
+    Precision precNet, precNg;
     setType type;
     std::string targetDevice;
-    std::tie(prec, type, targetDevice) = obj.param;
+    std::tie(precNet, precNg, type, targetDevice) = obj.param;
 
     std::ostringstream result;
-    result << "Type="<< type;
-    result << " Device="<< targetDevice;
-    result << " Precision=" << prec;
+    result << "Type=" << type << "_";
+    result << "Device=" << targetDevice << "_";
+    result << "PrecisionInNet=" << precNet << "_";
+    result << "PrecisionInNgraph=" << precNg;
     return result.str();
 }
 
@@ -65,7 +66,7 @@ void SetBlobTest::Infer() {
         const auto &info = input.second;
         Blob::Ptr inBlob;
         if (type == setType::INPUT || type == setType::BOTH) {
-            inBlob = make_blob_with_precision(precision, info->getTensorDesc());
+            inBlob = make_blob_with_precision(precNet, info->getTensorDesc());
             inBlob->allocate();
             fillBlob(inBlob);
         } else {
@@ -78,7 +79,7 @@ void SetBlobTest::Infer() {
     if (type == setType::OUTPUT || type == setType::BOTH) {
         for (const auto &output : executableNetwork.GetOutputsInfo()) {
             const auto &info = output.second;
-            Blob::Ptr outBlob = make_blob_with_precision(precision, info->getTensorDesc());
+            Blob::Ptr outBlob = make_blob_with_precision(precNet, info->getTensorDesc());
             outBlob->allocate();
             fillBlob(outBlob);
             inferRequest.SetBlob(info->getName(), outBlob);
@@ -90,12 +91,15 @@ void SetBlobTest::Infer() {
 
 void SetBlobTest::SetUp() {
     SizeVector IS{4, 5, 6, 7};
-    std::tie(precision, type, targetDevice) = this->GetParam();
+    Precision precNg;
+    std::tie(precNet, precNg, type, targetDevice) = this->GetParam();
 
+    if (type == setType::INPUT || type == setType::BOTH)
+        inPrc = precNet;
     if (type == setType::OUTPUT || type == setType::BOTH)
-        outPrc = precision;
+        outPrc = precNet;
 
-    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(precision);
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(precNg);
     auto params = ngraph::builder::makeParams(ngPrc, {IS});
     auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
     auto axisNode = std::make_shared<ngraph::op::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{}, std::vector<int64_t>{-1})->output(0);
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp
index 3e6e737e7b9..60a785ac920 100644
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp
@@ -92,6 +92,8 @@ void MatMulTransformation::validate() {
 }
 
 void MatMulTransformation::Run() {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
     LayerTestsCommon::Run();
 
     const auto params = std::get<3>(GetParam());
diff --git a/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp b/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp
index 96764070c5a..a83d6564c9e 100644
--- a/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp
+++ b/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp
@@ -28,7 +28,33 @@ TEST(MemDescTest, Conversion) {
         { dnnl::memory::format_tag::nhwc,        {4, 2, 10, 7 } },  // permuted
         { dnnl::memory::format_tag::nchw,        {4, 2, 10, 7 } },  // plain
         { dnnl::memory::format_tag::NChw16n16c,  {4, 2, 10, 7 } },  // blocked for 2 dims
-        { dnnl::memory::format_tag::BAcd16a16b,  {4, 2, 10, 7 } }   // blocked and permuted outer dims
+        { dnnl::memory::format_tag::BAcd16a16b,  {4, 2, 10, 7 } },  // blocked and permuted outer dims
+        { dnnl::memory::format_tag::Acdb16a,     {96, 1, 7, 7 } },  // same strides but not default order
+    };
+
+    for (const auto &p : payload)
+        ASSERT_TRUE(converted_correctly(p.first, p.second));
+}
+
+TEST(MemDescTest, CompareWithTensorDescRecomputedStrides) {
+    auto converted_correctly = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) {
+        dnnl::memory::desc orig_tdesc {dims, dnnl::memory::data_type::u8, fmt};
+        MKLDNNMemoryDesc plg_tdesc {orig_tdesc};
+        TensorDesc ie_tdesc {plg_tdesc};
+
+        const BlockingDesc block_dess(ie_tdesc.getBlockingDesc().getBlockDims(), ie_tdesc.getBlockingDesc().getOrder());
+        TensorDesc recomputed_tdesc(ie_tdesc.getPrecision(), ie_tdesc.getDims(), block_dess);
+
+        return  ie_tdesc == recomputed_tdesc;
+    };
+
+    std::pair<dnnl::memory::format_tag, dnnl::memory::dims> payload[] {
+        { dnnl::memory::format_tag::nChw16c,     {1, 1, 10, 10} },  // auto blocked
+        { dnnl::memory::format_tag::nhwc,        {4, 2, 10, 7 } },  // permuted
+        { dnnl::memory::format_tag::nchw,        {4, 2, 10, 7 } },  // plain
+        { dnnl::memory::format_tag::NChw16n16c,  {4, 2, 10, 7 } },  // blocked for 2 dims
+        { dnnl::memory::format_tag::BAcd16a16b,  {4, 2, 10, 7 } },  // blocked and permuted outer dims
+        { dnnl::memory::format_tag::Acdb16a,     {96, 1, 7, 7 } },  // same strides but not default order
     };
 
     for (const auto &p : payload)
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/config_param_test/config_param_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/config_param_test/config_param_test.cpp
deleted file mode 100644
index 57a7a235ff4..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/config_param_test/config_param_test.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <tests_common.hpp>
-#include <tests_common_func.hpp>
-#include <ie_plugin_config.hpp>
-#include <ngraph_functions/subgraph_builders.hpp>
-#include <functional_test_utils/plugin_cache.hpp>
-#include <functional_test_utils/blob_utils.hpp>
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-class smoke_PropertyTest : public TestsCommon, public TestsCommonFunc{};
-
-TEST_F(smoke_PropertyTest, onSplitConvConcat) {
-    auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat({1, 4, 100, 100});
-
-    CNNNetwork net(fnPtr);
-    auto ieCore = PluginCache::get().ie();
-    InferenceEngine::ExecutableNetwork exeNet = ieCore->LoadNetwork(net, CommonTestUtils::DEVICE_CPU);
-    InferenceEngine::InferRequest inferRequest0 = exeNet.CreateInferRequest();
-
-    auto blob0 = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
-
-    inferRequest0.SetBlob(net.getInputsInfo().begin()->first, blob0);
-    inferRequest0.Infer();
-    float* outRawData = inferRequest0.GetBlob(net.getOutputsInfo().begin()->first)->cbuffer().as<float*>();
-
-
-    exeNet = ieCore->LoadNetwork(net, CommonTestUtils::DEVICE_CPU,
-            {{PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, PluginConfigParams::CPU_THROUGHPUT_AUTO}});
-    InferenceEngine::InferRequest inferRequest1 = exeNet.CreateInferRequest();
-
-    auto blob1 = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
-
-    inferRequest1.SetBlob(net.getInputsInfo().begin()->first, blob1);
-    inferRequest1.Infer();
-    float* outRawDataWithConfig = inferRequest1.GetBlob(net.getOutputsInfo().begin()->first)->cbuffer().as<float*>();
-
-    float thr1, thr2;
-    FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32, thr1, thr2);
-
-    size_t outElementsCount = std::accumulate(begin(fnPtr->get_output_shape(0)), end(fnPtr->get_output_shape(0)), 1,
-                                              std::multiplies<size_t>());
-
-    FuncTestUtils::compareRawBuffers(outRawData, outRawDataWithConfig, outElementsCount, outElementsCount,
-                                                     FuncTestUtils::CompareType::ABS_AND_REL,
-                                                     thr1, thr2);
-}
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/dummy.cpp b/inference-engine/tests_deprecated/functional/mkldnn/dummy.cpp
new file mode 100644
index 00000000000..ffe853f7697
--- /dev/null
+++ b/inference-engine/tests_deprecated/functional/mkldnn/dummy.cpp
@@ -0,0 +1,4 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/extensions_tests/extensions_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/extensions_tests/extensions_test.cpp
deleted file mode 100644
index 79c8892152c..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/extensions_tests/extensions_test.cpp
+++ /dev/null
@@ -1,274 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <tests_common.hpp>
-#include <tests_common_func.hpp>
-#include <memory>
-#include <multi-device/multi_device_config.hpp>
-#include <ie_core.hpp>
-#include <ie_plugin_ptr.hpp>
-#include <ngraph/opsets/opset.hpp>
-#include <ngraph/ngraph.hpp>
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-struct extension_params {
-    std::string pluginName;
-    std::shared_ptr<IExtension> extension;
-    std::string plugin() { return pluginName + "Plugin"; }
-    // optional config (used for multi-device)
-    std::map<std::string, std::string> config;
-};
-
-class NewFakePrimitiveImpl : public InferenceEngine::ILayerExecImpl {
-public:
-    NewFakePrimitiveImpl(const std::shared_ptr<ngraph::Node>& node): node(node) {}
-
-    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc *resp) noexcept override {
-        InferenceEngine::LayerConfig config;
-        config.dynBatchSupport = true;
-        if (node->outputs().size() != 1 && node->inputs().size() != 1)
-            return InferenceEngine::GENERAL_ERROR;
-        InferenceEngine::DataConfig cfg;
-        cfg.constant = false;
-        cfg.inPlace = 0;
-        InferenceEngine::SizeVector order;
-        auto partialShape = node->get_output_partial_shape(0);
-        if (partialShape.is_dynamic())
-            return InferenceEngine::GENERAL_ERROR;
-        auto shape = node->get_output_shape(0);
-        for(size_t i = 0; i < shape.size(); i++) {
-            order.push_back(i);
-        }
-        cfg.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
-                                               shape, {shape, order});
-        config.outConfs.push_back(cfg);
-        config.inConfs.push_back(cfg);
-        conf.push_back(config);
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc *resp) noexcept override {
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::ResponseDesc *resp) noexcept override {
-        return InferenceEngine::OK;
-    }
-
-private:
-    const std::shared_ptr<ngraph::Node> node;
-};
-
-class FakeTestOp: public ngraph::op::Op {
-public:
-    static constexpr ngraph::NodeTypeInfo type_info{"Fake", 0};
-    const ngraph::NodeTypeInfo& get_type_info() const override { return type_info;  }
-
-    FakeTestOp() = default;
-    explicit FakeTestOp(const ngraph::Output<ngraph::Node>& arg): Op({arg}) {
-        constructor_validate_and_infer_types();
-    }
-
-    void validate_and_infer_types() override {
-        auto input_shape = get_input_partial_shape(0).to_shape();
-
-        ngraph::Shape output_shape(input_shape);
-        for (int i = 0; i < input_shape.size(); ++i) {
-            output_shape[i] = input_shape[i];
-        }
-
-        set_output_type(0, get_input_element_type(0), ngraph::PartialShape(output_shape));
-    }
-
-    std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override {
-        if (new_args.size() != 1) {
-            throw ngraph::ngraph_error("Incorrect number of new arguments");
-        }
-
-        return std::make_shared<FakeTestOp>(new_args.at(0));
-    }
-
-    bool visit_attributes(ngraph::AttributeVisitor& visitor) override {
-        return true;
-    }
-};
-
-constexpr ngraph::NodeTypeInfo FakeTestOp::type_info;
-
-class NewTestExtension : public InferenceEngine::IExtension {
-public:
-    NewTestExtension() {
-        impls["Fake"] = [](const std::shared_ptr<ngraph::Node>& node) -> InferenceEngine::ILayerImpl::Ptr {
-            return std::make_shared<NewFakePrimitiveImpl>(node);
-        };
-    }
-
-    void GetVersion(const InferenceEngine::Version *&versionInfo) const noexcept override {
-        static const InferenceEngine::Version VERSION{{}, "", ""};
-        versionInfo = &VERSION;
-    }
-
-    void Unload() noexcept override {}
-
-    std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override {
-        if (impls.find(node->description()) == impls.end())
-            return {};
-        return {"CPU"};
-    }
-
-    InferenceEngine::ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) override {
-        if (impls.find(node->description()) == impls.end() || implType != "CPU")
-            return nullptr;
-        return impls[node->description()](node);
-    }
-
-    std::map<std::string, ngraph::OpSet> getOpSets() override {
-        static std::map<std::string, ngraph::OpSet> opsets;
-        if (opsets.empty()) {
-            ngraph::OpSet opset;
-            opset.insert<FakeTestOp>();
-            opsets["custom_opset"] = opset;
-        }
-        return opsets;
-    }
-private:
-    std::map<std::string, std::function<InferenceEngine::ILayerImpl::Ptr(const std::shared_ptr<ngraph::Node>)>> impls;
-};
-
-class smoke_ExtensionTest : public TestsCommon,
-                            public TestsCommonFunc {
-
-protected:
-    void checkExtensionRemoved(extension_params p) {
-        try {
-            std::unique_ptr<InferenceEnginePluginPtr> score_engine;
-            score_engine.reset(new InferenceEnginePluginPtr(make_plugin_name(p.plugin()).c_str()));
-            (*score_engine)->SetConfig(p.config);
-            ASSERT_EQ(p.extension.use_count(), 2);
-
-            (*score_engine)->AddExtension(p.extension);
-            // multi-device holds additional reference of the extension ptr
-            ASSERT_EQ(p.extension.use_count(), p.pluginName.find("Multi")==std::string::npos ? 3 : 4);
-            score_engine.reset();
-
-            ASSERT_EQ(p.extension.use_count(), 2);
-        } catch (const InferenceEngine::Exception& e) {
-            FAIL() << e.what();
-        }
-    }
-    void checkExtensionNotRemovedFromAnotherEngineObject(extension_params p) {
-        try {
-            std::unique_ptr<InferenceEnginePluginPtr> score_engine1;
-            score_engine1.reset(new InferenceEnginePluginPtr(make_plugin_name(p.plugin()).c_str()));
-            (*score_engine1)->SetConfig(p.config);
-
-            std::unique_ptr<InferenceEnginePluginPtr> score_engine2;
-            score_engine2.reset(new InferenceEnginePluginPtr(make_plugin_name(p.plugin()).c_str()));
-            (*score_engine2)->SetConfig(p.config);
-            ASSERT_EQ(p.extension.use_count(), 2);
-
-            (*score_engine1)->AddExtension(p.extension);
-            // multi-device holds additional reference of the extension ptr
-            ASSERT_EQ(p.extension.use_count(), p.pluginName.find("Multi")==std::string::npos ? 3 : 4);
-            score_engine2.reset();
-
-            // multi-device holds additional reference of the extension ptr
-            ASSERT_EQ(p.extension.use_count(), p.pluginName.find("Multi")==std::string::npos ? 3 : 4);
-            score_engine1.reset();
-            ASSERT_EQ(p.extension.use_count(), 2);
-        } catch (const InferenceEngine::Exception& e) {
-            FAIL() << e.what();
-        }
-    }
-
-    void checkNotSharedExtensions(std::shared_ptr<IExtension> extension, std::string device) {
-            std::string model = R"V0G0N(
-        <Net Name="DoubleLayer_Only" version="10" precision="FP32" batch="1">
-            <layers>
-                <layer name="in1" type="Parameter" precision="FP32" version="opset1" id="0">
-                    <data element_type="f32" shape="1,3,5,5"/>
-                    <output>
-                        <port id="0" precision="FP32">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="fake_layer" id="1" type="Fake" version="custom_opset" precision="FP32">
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2" precision="FP32">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="output" type="Result" id="2" version="opset1">
-                    <input>
-                        <port id="0" precision="FP32">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-                <edge from-layer="1" from-port="2" to-layer="2" to-port="0"/>
-            </edges>
-        </Net>
-        )V0G0N";
-
-        try {
-            Core ie;
-            ie.AddExtension(extension, "CPU");
-            Core ie2;
-
-            Blob::Ptr weights;
-            CNNNetwork cnnNet1 = ie.ReadNetwork(model, weights);
-            ASSERT_NO_THROW(ie.LoadNetwork(cnnNet1, device));
-            ASSERT_THROW(ie2.ReadNetwork(model, weights), InferenceEngine::Exception);
-        } catch (const InferenceEngine::Exception& e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-#ifndef ENABLE_MKL_DNN
- #include "disable_tests.hpp"
-#endif
-
-TEST_F(smoke_ExtensionTest, MKLDNN_delete_extension) {
-    std::shared_ptr<IExtension> ext(new NewTestExtension());
-    checkExtensionRemoved({"MKLDNN", ext});
-}
-
-TEST_F(smoke_ExtensionTest, MKLDNN_no_delete_extension_from_another_engine) {
-    std::shared_ptr<IExtension> ext(new NewTestExtension());
-    checkExtensionNotRemovedFromAnotherEngineObject({"MKLDNN", ext});
-}
-
-TEST_F(smoke_ExtensionTest, MKLDNN_no_share_extension_between_engines) {
-    std::shared_ptr<IExtension> ext(new NewTestExtension());
-    checkNotSharedExtensions(ext, "CPU");
-}
-
-TEST_F(smoke_ExtensionTest, MKLDNN_no_share_new_extension_between_engines) {
-    std::shared_ptr<IExtension> ext(new NewTestExtension());
-    checkNotSharedExtensions(ext, "CPU");
-}
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/network_tests/ngraph_network_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/network_tests/ngraph_network_test.cpp
deleted file mode 100644
index 5ae7de8ae7c..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/network_tests/ngraph_network_test.cpp
+++ /dev/null
@@ -1,359 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <tests_common.hpp>
-#include <tests_common_func.hpp>
-#include <memory>
-#include <ie_core.hpp>
-
-#define XBYAK_NO_OP_NAMES
-#define XBYAK_UNDEF_JNL
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-struct ngraph_network_param {
-    std::string modelFile;
-    std::string imageName;
-    std::string ngraphModel;
-
-    std::string model() {
-        ModelsPath result;
-        result += kPathSeparator;
-        result += modelFile;
-        return result;
-    }
-
-    std::string weights() {
-        ModelsPath result;
-        result += kPathSeparator;
-        result += FileUtils::fileNameNoExt(modelFile);
-        result += ".bin";
-        return result;
-    }
-
-    std::string image() {
-        std::string result = TestDataHelpers::get_data_path();
-        result += kPathSeparator;
-        result += imageName;
-        return result;
-    }
-
-    std::string v7model() {
-        ModelsPath result;
-        result += kPathSeparator;
-        result += ngraphModel;
-        return result;
-    }
-};
-
-class smoke_NGraphNetworkTest : public TestsCommon, public TestsCommonFunc {
-protected:
-    Blob::Ptr classifyV7(ngraph_network_param p, size_t batch_size = 1, float threshold = 0.005f) {
-        Core ie;
-        CNNNetwork network = ie.ReadNetwork(p.v7model());
-
-        ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-        InferRequest inferRequest = exeNetwork.CreateInferRequest();
-
-        Blob::Ptr src = readInput(p.image(), batch_size);
-
-        OutputsDataMap outInfo = network.getOutputsInfo();
-        InputsDataMap inputInfo = network.getInputsInfo();
-
-        auto dst = make_shared_blob<float>(outInfo.begin()->second->getTensorDesc());
-        dst->allocate();
-        inferRequest.SetBlob(inputInfo.begin()->first, src);
-        inferRequest.SetBlob(outInfo.begin()->first, dst);
-        inferRequest.Infer();
-
-        return dst;
-    }
-
-    Blob::Ptr classifyV5(ngraph_network_param p, size_t batch_size = 1, float threshold = 0.005f) {
-        Core ie;
-        CNNNetwork network = ie.ReadNetwork(p.model(), p.weights());
-        if (batch_size != 1)
-            network.setBatchSize(batch_size);
-
-        ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-        InferRequest inferRequest = exeNetwork.CreateInferRequest();
-
-        Blob::Ptr src = readInput(p.image(), batch_size);
-
-        OutputsDataMap outInfo;
-        outInfo = network.getOutputsInfo();
-
-        auto dst = make_shared_blob<float>(outInfo.begin()->second->getTensorDesc());
-        dst->allocate();
-        inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-        inferRequest.SetBlob(outInfo.begin()->first, dst);
-        inferRequest.Infer();
-
-        return dst;
-    }
-
-    void classify(ngraph_network_param p) {
-        try {
-            auto v7blb = classifyV7(p);
-            auto v5blb = classifyV5(p);
-
-            auto* v7data = v7blb->buffer().as<float *>();
-            auto* v5data = v5blb->buffer().as<float *>();
-
-            ASSERT_EQ(v7blb->size(), v5blb->size());
-            for (size_t i = 0; i < v7blb->size(); i++) {
-                ASSERT_EQ(v7data[i], v5data[i]);
-            }
-        } catch (const InferenceEngine::Exception& e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-/*************************************************
- * !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!!
- * All ref values was obtained from Caffe scoring
- * !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!!
- *************************************************/
-#ifndef ENABLE_MKL_DNN
- #include "disable_tests.hpp"
-#endif
-
-TEST_F(smoke_NGraphNetworkTest, reshapeLoadTest) {
-    std::string model = R"V0G0N(
-<?xml version="1.0" ?>
-<net batch="1" name="test" precision="FP32" version="10">
-    <layers>
-        <layer id="0" name="data" type="Parameter" version="opset1">
-            <data element_type="f32" shape="1,1,28,28"/>
-            <output>
-                <port id="0" precision="FP32">
-                    <dim>1</dim>
-                    <dim>1</dim>
-                    <dim>28</dim>
-                    <dim>28</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="1" name="13/Output_0/Data__const" type="Const" version="opset1">
-            <data element_type="f32" offset="0" shape="20,1,5,5" size="2000"/>
-            <output>
-                <port id="1" precision="FP32">
-                    <dim>20</dim>
-                    <dim>1</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="2" name="conv1" type="Convolution" version="opset1">
-            <data dilations="1,1" group="1" output="20" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>1</dim>
-                    <dim>28</dim>
-                    <dim>28</dim>
-                </port>
-                <port id="1">
-                    <dim>20</dim>
-                    <dim>1</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2" precision="FP32">
-                    <dim>1</dim>
-                    <dim>20</dim>
-                    <dim>24</dim>
-                    <dim>24</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="3" name="conv1/Dims215/copy_const" type="Const" version="opset1">
-            <data element_type="f32" offset="2000" shape="1,20,1,1" size="80"/>
-            <output>
-                <port id="1" precision="FP32">
-                    <dim>1</dim>
-                    <dim>20</dim>
-                    <dim>1</dim>
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="4" name="conv1/Bias" type="Add" version="opset1">
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>20</dim>
-                    <dim>24</dim>
-                    <dim>24</dim>
-                </port>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>20</dim>
-                    <dim>1</dim>
-                    <dim>1</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2" precision="FP32">
-                    <dim>1</dim>
-                    <dim>20</dim>
-                    <dim>24</dim>
-                    <dim>24</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="5" name="pool1" type="MaxPool" version="opset1">
-            <data kernel="2,2" pads_begin="0,0" pads_end="0,0" rounding_type="ceil" strides="2,2"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>20</dim>
-                    <dim>24</dim>
-                    <dim>24</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1" precision="FP32">
-                    <dim>1</dim>
-                    <dim>20</dim>
-                    <dim>12</dim>
-                    <dim>12</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="6" name="11/Output_0/Data__const" type="Const" version="opset1">
-            <data element_type="f32" offset="2080" shape="50,20,5,5" size="100000"/>
-            <output>
-                <port id="1" precision="FP32">
-                    <dim>50</dim>
-                    <dim>20</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="7" name="conv2" type="Convolution" version="opset1">
-            <data dilations="1,1" group="1" output="50" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>20</dim>
-                    <dim>12</dim>
-                    <dim>12</dim>
-                </port>
-                <port id="1">
-                    <dim>50</dim>
-                    <dim>20</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2" precision="FP32">
-                    <dim>1</dim>
-                    <dim>50</dim>
-                    <dim>8</dim>
-                    <dim>8</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="8" name="conv2/Dims209/copy_const" type="Const" version="opset1">
-            <data element_type="f32" offset="102080" shape="1,50,1,1" size="200"/>
-            <output>
-                <port id="1" precision="FP32">
-                    <dim>1</dim>
-                    <dim>50</dim>
-                    <dim>1</dim>
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="9" name="conv2/Bias" type="Add" version="opset1">
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>50</dim>
-                    <dim>8</dim>
-                    <dim>8</dim>
-                </port>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>50</dim>
-                    <dim>1</dim>
-                    <dim>1</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2" precision="FP32">
-                    <dim>1</dim>
-                    <dim>50</dim>
-                    <dim>8</dim>
-                    <dim>8</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="10" name="pool2" type="MaxPool" version="opset1">
-            <data kernel="2,2" pads_begin="0,0" pads_end="0,0" rounding_type="ceil" strides="2,2"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>50</dim>
-                    <dim>8</dim>
-                    <dim>8</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1" precision="FP32">
-                    <dim>1</dim>
-                    <dim>50</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="11" name="prob/sink_port_0" type="Result" version="opset1">
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>50</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </input>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="4" to-port="0"/>
-        <edge from-layer="3" from-port="1" to-layer="4" to-port="1"/>
-        <edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
-        <edge from-layer="5" from-port="1" to-layer="7" to-port="0"/>
-        <edge from-layer="6" from-port="1" to-layer="7" to-port="1"/>
-        <edge from-layer="7" from-port="2" to-layer="9" to-port="0"/>
-        <edge from-layer="8" from-port="1" to-layer="9" to-port="1"/>
-        <edge from-layer="9" from-port="2" to-layer="10" to-port="0"/>
-        <edge from-layer="10" from-port="1" to-layer="11" to-port="0"/>
-    </edges>
-</net>)V0G0N";
-    InferenceEngine::Blob::Ptr weights = make_shared_blob<uint8_t>({InferenceEngine::Precision::U8, {1724336}, InferenceEngine::C});
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    std::map<std::string, std::vector<size_t>> shape;
-    shape["data"] = {1, 1, 28, 28};
-
-    Core ie;
-    CNNNetwork network = ie.ReadNetwork(model, weights);
-    for (size_t i = 0; i < 10; i++) {
-        network.reshape(shape);
-        ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-    }
-}
-
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/regression_tests/regression_reference.cpp b/inference-engine/tests_deprecated/functional/mkldnn/regression_tests/regression_reference.cpp
deleted file mode 100644
index 634cb9c7e71..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/regression_tests/regression_reference.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "regression_reference.hpp"
-
-namespace Regression {
-    namespace Reference {
-
-        std::map<std::string, std::vector<ClassificationScoringResultsForTests>> values = {
-            };
-    }  // namespace Reference
-}  // namespace Regression
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/common_single_layer_tests/single_layer_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/common_single_layer_tests/single_layer_tests.cpp
deleted file mode 100644
index 69fc62c23da..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/common_single_layer_tests/single_layer_tests.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "single_layer_tests.hpp"
-
-
-static CommonTestUtils::conv_common_params convParams =
-        {
-                PropertyVector<unsigned>{{2, 2}},  // stride
-                PropertyVector<unsigned>{{3, 3}},  // kernel
-                {},                                // pad_begin
-                {},                                // pad_end
-                PropertyVector<unsigned>{{1, 1}},  // dilation
-                "same_upper",                      // auto_pad
-                1,                                 // group
-                2                                  // out_c
-        };
-
-static CommonTestUtils::pool_common_params poolParams =
-        {
-                PropertyVector<unsigned>{{2, 2}},  // stride
-                PropertyVector<unsigned>{{3, 3}},  // kernel
-                {},                                // pad_begin
-                {},                                // pad_end
-                "same_upper",                      // auto_pad
-                true,                              // avg
-                false                              // exclude_pad
-        };
-
-static CommonTestUtils::conv_common_params defConvParamsHeavy =
-        {
-                PropertyVector<unsigned>{{1, 1}},  // stride
-                PropertyVector<unsigned>{{3, 3}},  // kernel
-                {},                                // pad_begin
-                {},                                // pad_end
-                PropertyVector<unsigned>{{2, 2}},  // dilation
-                "same_upper",                      // auto_pad
-                1,                                 // group
-                128                                // out_c
-        };
-
-static CommonTestUtils::conv_common_params defConvParamsLight0 =
-        {
-                PropertyVector<unsigned>{{1, 1}},  // stride
-                PropertyVector<unsigned>{{3, 3}},  // kernel
-                {},                                // pad_begin
-                {},                                // pad_end
-                PropertyVector<unsigned>{{2, 2}},  // dilation
-                "same_upper",                      // auto_pad
-                1,                                 // group
-                4                                  // out_c
-        };
-
-static CommonTestUtils::conv_common_params defConvParamsLight1 =
-        {
-                PropertyVector<unsigned>{{2, 2}},  // stride
-                PropertyVector<unsigned>{{3, 3}},  // kernel
-                {},                                // pad_begin
-                {},                                // pad_end
-                PropertyVector<unsigned>{{1, 1}},  // dilation
-                "same_upper",                      // auto_pad
-                1,                                 // group
-                16                                 // out_c
-        };
-
-
-static CommonTestUtils::conv_common_params defConvParamsLight2 =
-        {
-                PropertyVector<unsigned>{{2, 2}},  // stride
-                PropertyVector<unsigned>{{3, 3}},  // kernel
-                {},                                // pad_begin
-                {},                                // pad_end
-                PropertyVector<unsigned>{{2, 2}},  // dilation
-                "same_upper",                      // auto_pad
-                1,                                 // group
-                15                                 // out_c
-        };
-
-
-static CommonTestUtils::conv_common_params defConvParamsLight3 =
-        {
-                PropertyVector<unsigned>{{1, 1}},  // stride
-                PropertyVector<unsigned>{{3, 3}},  // kernel
-                {},                                // pad_begin
-                {},                                // pad_end
-                PropertyVector<unsigned>{{2, 2}},  // dilation
-                "same_upper",                      // auto_pad
-                2,                                 // group
-                4                                  // out_c
-        };
-
-static std::vector<PluginParams> pluginParams = {
-        PluginDependentParam{"CPU", Layout::NCHW, Precision::FP32, 0.001f}
-};
-
-std::string
-getTestCaseName(testing::TestParamInfo<std::tuple<InitialShapes, NewShapes, PluginParams, Helper>> obj) {
-    auto params = obj.param;
-    LayerTestHelper::Ptr helper = std::get<3>(params);
-    return "MKLDNN" + helper->getType();
-}
-
-INSTANTIATE_TEST_CASE_P(
-        // TODO: rewrite to ngraph to have reshape functionality
-        DISABLED_Conv_smoke, CommonSingleLayerTest,
-        ::testing::Combine(
-        ::testing::Values(InitialShapes({
-                                                {{1, 2, 16, 16}},           // input
-                                                {{1, 2, 8,  8}}             // output
-                                        })),
-        ::testing::Values(NewShapes({
-                                            {{1, 2, 15, 15}},               // input
-                                            {{1, 2, 8,  8}}                 // output
-                                    })),
-        ::testing::ValuesIn(pluginParams),
-        ::testing::Values(Helper(std::make_shared<ConvolutionTestHelper>(convParams)))
-), getTestCaseName
-);
-
-INSTANTIATE_TEST_CASE_P(
-        // TODO: rewrite to ngraph to have reshape functionality
-        DISABLED_Deconv_smoke, CommonSingleLayerTest,
-        ::testing::Combine(
-        ::testing::Values(InitialShapes({
-                                                {{1, 2, 8,  8}},             // input
-                                                {{1, 2, 16, 16}}              // output
-                                        })),
-        ::testing::Values(NewShapes({
-                                            {{1, 2, 7,  7}},                  // input
-                                            {{1, 2, 14, 14}}                  // output
-                                    })),
-        ::testing::ValuesIn(pluginParams),
-        ::testing::Values(Helper(std::make_shared<DeconvolutionTestHelper>(convParams)))
-), getTestCaseName
-);
-
-INSTANTIATE_TEST_CASE_P(
-        // TODO: rewrite to ngraph to have reshape functionality
-        DISABLED_Pool_smoke, CommonSingleLayerTest,
-        ::testing::Combine(
-        ::testing::Values(InitialShapes({
-                                                {{1, 2, 16, 16}},           // input
-                                                {{1, 2, 8,  8}}             // output
-                                        })),
-        ::testing::Values(NewShapes({
-                                            {{1, 2, 15, 15}},               // input
-                                            {{1, 2, 8,  8}}                 // output
-                                    })),
-        ::testing::ValuesIn(pluginParams),
-        ::testing::Values(Helper(std::make_shared<PoolingTestHelper>(poolParams)))
-), getTestCaseName
-);
-
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_DefConvLight0_smoke, CommonSingleLayerTest,
-        ::testing::Combine(
-                ::testing::Values(InitialShapes({
-                                                        {{1, 4, 4, 4}, {1, 36, 4, 4}}, // input, trans
-                                                        {{1, 4, 4, 4}}                 // output
-                                                })),
-                ::testing::Values(NewShapes({
-                                                    {{1, 4, 4, 4}, {1, 36, 4, 4}}, // input, trans
-                                                    {{1, 4, 4, 4}}                 // output
-                                            })),
-                ::testing::ValuesIn(pluginParams),
-                ::testing::Values(Helper(std::make_shared<DeformableConvolutionTestHelper>(defConvParamsLight0, 2)))
-        ), getTestCaseName
-);
-
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_DefConvLight1_WithBatch_smoke, CommonSingleLayerTest,
-        ::testing::Combine(
-                ::testing::Values(InitialShapes({
-                                                        {{2, 4, 8, 8}, {2, 36, 4, 4}}, // input, trans
-                                                        {{2, 16, 4, 4}}                // output
-                                                })),
-                ::testing::Values(NewShapes({
-                                                    {{2, 4, 8, 8}, {2, 36, 4, 4}}, // input, trans
-                                                    {{2, 16, 4, 4}}                // output
-                                            })),
-                ::testing::ValuesIn(pluginParams),
-                ::testing::Values(Helper(std::make_shared<DeformableConvolutionTestHelper>(defConvParamsLight1, 2)))
-        ), getTestCaseName
-);
-
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_DefConvLight2_WithBatch_smoke, CommonSingleLayerTest,
-        ::testing::Combine(
-                ::testing::Values(InitialShapes({
-                                                        {{2, 4, 8, 8}, {2, 18, 4, 4}}, // input, trans
-                                                        {{2, 15, 4, 4}}                // output
-                                                })),
-                ::testing::Values(NewShapes({
-                                                    {{2, 4, 8, 8}, {2, 18, 4, 4}}, // input, trans
-                                                    {{2, 15, 4, 4}}                // output
-                                            })),
-                ::testing::ValuesIn(pluginParams),
-                ::testing::Values(Helper(std::make_shared<DeformableConvolutionTestHelper>(defConvParamsLight2, 1)))
-        ), getTestCaseName
-);
-
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_DefConvLight3_WithGroups_smoke, CommonSingleLayerTest,
-        ::testing::Combine(
-                ::testing::Values(InitialShapes({
-                                                        {{1, 4, 4, 4}, {1, 18, 4, 4}}, // input, trans
-                                                        {{1, 4, 4, 4}}                 // output
-                                                })),
-                ::testing::Values(NewShapes({
-                                                    {{1, 4, 4, 4}, {1, 18, 4, 4}}, // input, trans
-                                                    {{1, 4, 4, 4}}                 // output
-                                            })),
-                ::testing::ValuesIn(pluginParams),
-                ::testing::Values(Helper(std::make_shared<DeformableConvolutionTestHelper>(defConvParamsLight3, 1)))
-        ), getTestCaseName
-);
-
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_smoke_DefConvHeavy, CommonSingleLayerTest,
-        ::testing::Combine(
-                ::testing::Values(InitialShapes({
-                                                        {{1, 512, 38, 38}, {1, 72, 38, 38}}, // input, trans
-                                                        {{1, 128, 38, 38}}                   // output
-                                                })),
-                ::testing::Values(NewShapes({
-                                                    {{1, 512, 38, 38}, {1, 72, 38, 38}}, // input, trans
-                                                    {{1, 128, 38, 38}}                   // output
-                                            })),
-                ::testing::ValuesIn(pluginParams),
-                ::testing::Values(Helper(std::make_shared<DeformableConvolutionTestHelper>(defConvParamsHeavy, 4)))
-        ), getTestCaseName
-);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/graph_tools/graph_tools_functional_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/graph_tools/graph_tools_functional_tests.cpp
deleted file mode 100644
index 2863bdbea27..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/graph_tools/graph_tools_functional_tests.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <ie_core.hpp>
-#include <ngraph_functions/subgraph_builders.hpp>
-#include "graph_tools_functional_tests.hpp"
-#include <legacy/details/ie_cnn_network_tools.h>
-
-using namespace testing;
-using namespace InferenceEngine::details;
-using namespace InferenceEngine;
-using namespace std;
-
-TEST_F(GraphToolsFncTest, smoke_canSortSplitConvConcat) {
-    CNNNetwork network(ngraph::builder::subgraph::makeSplitConvConcat());
-    checkSort(CNNNetSortTopologically(network));
-}
-
-
-TEST_F(GraphToolsFncTest, smoke_canSortTIwithLstm) {
-    CNNNetwork network(ngraph::builder::subgraph::makeTIwithLSTMcell());
-    checkSort(CNNNetSortTopologically(network));
-
-    checkSort(CNNNetSortTopologically(network));
-}
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/inference_engine_regression_tests/common_dyn_batch_regression.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/inference_engine_regression_tests/common_dyn_batch_regression.cpp
deleted file mode 100644
index 5514971502c..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/inference_engine_regression_tests/common_dyn_batch_regression.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "common_dyn_batch_regression.hpp"
-
-std::vector<CommonDynBatchFuncTestParams> supportedDynBatchValues = {
-    { "CPU", 4, 3 },
-    { "CPU", 4, 2 },
-    { "CPU", 4, 1 },
-    { "CPU", 8, 5 },
-    { "CPU", 8, 4 },
-    { "CPU", 8, 3 }
-};
-
-INSTANTIATE_TEST_CASE_P(FunctionalTest_smoke, TestNoRegressionDynBatchFP32, ValuesIn(supportedDynBatchValues), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/input_tests/parser_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/input_tests/parser_tests.cpp
deleted file mode 100644
index 916fb9a3de8..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/input_tests/parser_tests.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "parser_tests.hpp"
-
-ir_test_params ir_test_cases[] = {
-        ir_test_params("CPU", "FP32", negative_conv_kernel_x_case),
-        ir_test_params("CPU", "FP32", negative_conv_kernel_y_case),
-        ir_test_params("CPU", "FP32", negative_conv_stride_x_case),
-        ir_test_params("CPU", "FP32", negative_conv_weights_case),
-        ir_test_params("CPU", "FP32", negative_conv_biases_case),
-
-        ir_test_params("CPU", "FP32", negative_fc_out_size_case),
-        ir_test_params("CPU", "FP32", negative_fc_weights_case),
-        ir_test_params("CPU", "FP32", negative_fc_biases_case),
-
-        ir_test_params("CPU", "FP32", negative_deconv_kernel_x_case),
-        ir_test_params("CPU", "FP32", negative_deconv_kernel_y_case),
-        ir_test_params("CPU", "FP32", negative_deconv_stride_x_case),
-        ir_test_params("CPU", "FP32", negative_deconv_weights_case),
-        ir_test_params("CPU", "FP32", negative_deconv_biases_case),
-
-        ir_test_params("CPU", "FP32", negative_pool_kernel_x_case),
-        ir_test_params("CPU", "FP32", negative_pool_kernel_y_case),
-        ir_test_params("CPU", "FP32", negative_pool_stride_x_case),
-        ir_test_params("CPU", "FP32", incorrect_pool_type_case),
-
-        ir_test_params("CPU", "FP32", negative_norm_local_size_case),
-        ir_test_params("CPU", "FP32", negative_norm_k_case)
-};
-
-INSTANTIATE_TEST_CASE_P(FunctionalTest_smoke, IncorrectIRTests,
-        ::testing::ValuesIn(ir_test_cases),
-        getTestName);
-
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/cropResize_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/cropResize_tests.cpp
deleted file mode 100644
index b13922560c4..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/cropResize_tests.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "cropResize_tests.hpp"
-
-#ifdef USE_OPENCV
-
-#define COMBINE_WITH_DEFAULT(_dims, _in_layouts, _color_formats) \
-    Combine(Values(Precision::FP32), \
-            Values(_dims), \
-            Values(std::make_pair(Precision::FP32, 1e-2), std::make_pair(Precision::U8, 1)), \
-            Values(_in_layouts), \
-            Values(ResizeAlgorithm::RESIZE_BILINEAR, ResizeAlgorithm::RESIZE_AREA), \
-            Values(_color_formats), \
-            Values(ROI({0, 40, 50, 220, 220})), \
-            Values(false, true))
-
-// test resize-only for all dims (as before)
-// test resize + color conversion for smaller number of dims (simple upscale/downscale scenarios only)
-namespace smoke {
-static auto params_resize_only = COMBINE_WITH_DEFAULT(
-    TESTED_DIMS(1),
-    MULTI_VALUE(NCHW, NHWC),
-    COLOR_FORMATS_RAW);
-
-static auto params_csc_3ch_and_resize = COMBINE_WITH_DEFAULT(
-    TESTED_DIMS_SMALL(1),
-    MULTI_VALUE(NCHW, NHWC),
-    COLOR_FORMATS_3CH);
-
-static auto params_csc_4ch_and_resize = COMBINE_WITH_DEFAULT(
-    TESTED_DIMS_SMALL(1),
-    NHWC,
-    COLOR_FORMATS_4CH);
-
-// batch preprocessing parameters:
-static auto batch_params_resize_only = COMBINE_WITH_DEFAULT(
-    TESTED_DIMS(2),
-    MULTI_VALUE(NCHW, NHWC),
-    COLOR_FORMATS_RAW);
-
-static auto batch_params_csc_3ch_and_resize = COMBINE_WITH_DEFAULT(
-    TESTED_DIMS_SMALL(2),
-    MULTI_VALUE(NCHW, NHWC),
-    COLOR_FORMATS_3CH);
-
-static auto batch_params_csc_4ch_and_resize = COMBINE_WITH_DEFAULT(
-    TESTED_DIMS_SMALL(2),
-    NHWC,
-    COLOR_FORMATS_4CH);
-}  // namespace smoke
-
-
-// test everything in nightly (as before)
-namespace nightly {
-static auto params_csc_3ch_and_resize = COMBINE_WITH_DEFAULT(
-    TESTED_DIMS(1),
-    MULTI_VALUE(NCHW, NHWC),
-    MULTI_VALUE(COLOR_FORMATS_RAW, COLOR_FORMATS_3CH));
-
-static auto params_csc_4ch_and_resize = COMBINE_WITH_DEFAULT(
-    TESTED_DIMS(1),
-    NHWC,
-    COLOR_FORMATS_4CH);
-
-// batch preprocessing parameters:
-static auto batch_params_csc_3ch_and_resize = COMBINE_WITH_DEFAULT(
-    MULTI_VALUE(TESTED_DIMS(2), TESTED_DIMS(3)),
-    MULTI_VALUE(NCHW, NHWC),
-    MULTI_VALUE(COLOR_FORMATS_RAW, COLOR_FORMATS_3CH));
-
-static auto batch_params_csc_4ch_and_resize = COMBINE_WITH_DEFAULT(
-    MULTI_VALUE(TESTED_DIMS(2), TESTED_DIMS(3)),
-    NHWC,
-    COLOR_FORMATS_4CH);
-}  // namespace nightly
-
-// reorder preprocessing parameters:
-static auto reorder_params = Combine(
-        Values(Precision::FP32),  // network precision
-        Values(SizeVector({1, 3, 300, 300})),  // sizes of the network
-        Values(std::make_pair(Precision::FP32, 1e-2), std::make_pair(Precision::U8, 1)),  // precision and threshold
-        Values(std::make_pair(NCHW, NHWC), std::make_pair(NHWC, NCHW)),  // Input/network data layout
-        Values(ResizeAlgorithm::NO_RESIZE),
-        Values(ColorFormat::BGR),
-        Values(ROI({0, 0, 0, 300, 300})),  // cropped ROI params (id, x, y, width, height)
-        Values(false, true)  // Infer mode sync/async
-);
-
-// nv12 preprocessing parameters:
-static auto nv12_params = Combine(
-        Values(Precision::FP32),  // network precision
-        Values(cv::Size(300, 300)),  // input image size
-        Values(TESTED_DIMS(1)),  // sizes of the network
-        Values(std::make_pair(Precision::U8, 1)),  // precision and threshold
-        Values(ResizeAlgorithm::RESIZE_BILINEAR, ResizeAlgorithm::RESIZE_AREA),
-        Values(ColorFormat::NV12),
-        Values(ROI({0, 0, 0, 300, 300}), ROI({0, 15, 10, 210, 210})),  // cropped ROI params (id, x, y, width, height)
-        Values(false, true)  // Infer mode sync/async
-);
-
-static auto random_roi_3c = Combine(
-            Values(Precision::FP32),
-            Values(TESTED_DIMS(1)),
-            Values(std::make_pair(Precision::FP32, 1e-2), std::make_pair(Precision::U8, 1)),
-            Values(MULTI_VALUE(NCHW, NHWC)),
-            Values(ResizeAlgorithm::RESIZE_BILINEAR, ResizeAlgorithm::RESIZE_AREA),
-            Values(COLOR_FORMATS_3CH),
-            Values(ROI({0, 0, 0, 0, 0})),
-            Values(false, true)
-);
-
-static auto random_roi_4c = Combine(
-            Values(Precision::FP32),
-            Values(TESTED_DIMS(1)),
-            Values(std::make_pair(Precision::FP32, 1e-2), std::make_pair(Precision::U8, 1)),
-            Values(NHWC),
-            Values(ResizeAlgorithm::RESIZE_BILINEAR, ResizeAlgorithm::RESIZE_AREA),
-            Values(COLOR_FORMATS_4CH),
-            Values(ROI({0, 0, 0, 0, 0})),
-            Values(false, true)
-);
-
-static auto random_roi_nv12 = Combine(
-            Values(Precision::FP32),
-            Values(TESTED_DIMS(1)),
-            Values(std::make_pair(Precision::U8, 1)),
-            Values(NHWC),
-            Values(ResizeAlgorithm::RESIZE_BILINEAR, ResizeAlgorithm::RESIZE_AREA),
-            Values(ColorFormat::NV12),
-            Values(ROI({0, 0, 0, 0, 0})),
-            Values(false, true)
-);
-struct PreprocessRegression: public TestsCommon {};
-
-TEST_F(PreprocessRegression, smoke_DifferentSizes) {
-    // Reproduce "object was compiled for different meta" problem.
-    // When G-API/Fluid is used as a preprocessing engine,
-    // its state wasn't updated internally if input dimensions changed.
-    // Thus while graph itself continued working properly on all dimensions,
-    // it wan't reshaped when it had to:
-    // * On first call (frame size = X), _lastCall is initialized with size X
-    // * On second call (frame size = Y), graph is reshaped to size Y but _lastCall is still X
-    // * On third call (frame size = X), graph is NOT reshaped since this X matches _lastCall,
-    //   exception is thrown since a graph reshaped to input size Y is asked to process input size X.
-
-    Blob::Ptr in_blob;
-    Blob::Ptr out_blob;
-
-    std::vector<cv::Size> in_sizes = {
-        cv::Size(256, 256),
-        cv::Size(72, 72),
-        cv::Size(256, 256),
-    };
-
-    SizeVector out_dims = {1, 3, 64, 64};
-    out_blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, out_dims, Layout::NCHW));
-    out_blob->allocate();
-
-    PreProcessInfo info;
-    info.setResizeAlgorithm(RESIZE_BILINEAR);
-
-    PreProcessDataPtr preprocess = CreatePreprocDataHelper();
-    for (auto sz : in_sizes) {
-        cv::Mat in_mat = cv::Mat::eye(sz, CV_8UC3)*255;
-        in_blob = img2Blob<Precision::U8>(in_mat, Layout::NHWC);
-        preprocess->setRoiBlob(in_blob);
-        EXPECT_NO_THROW(preprocess->execute(out_blob, info, false));
-    }
-
-    // Not thrown = test is green.
-};
-
-struct IEPreprocessTest : public TestsCommon {};
-TEST_F(IEPreprocessTest, smoke_NetworkInputSmallSize) {
-    const size_t num_threads = parallel_get_max_threads();
-
-    std::vector<cv::Size> out_sizes = {
-            cv::Size(num_threads, num_threads - 1),
-            cv::Size(num_threads - 1, num_threads),
-            cv::Size(1, 1),
-            cv::Size(1, 0),
-            cv::Size(0, 1)
-    };
-
-    SizeVector in_dims = {1, 3, num_threads * 2, num_threads * 2};
-    cv::Mat in_mat = cv::Mat::eye(cv::Size(in_dims[3], in_dims[2]), CV_8UC3)*255;
-    Blob::Ptr in_blob = img2Blob<Precision::U8>(in_mat, Layout::NHWC);
-
-    PreProcessInfo info;
-    info.setResizeAlgorithm(RESIZE_BILINEAR);
-
-    PreProcessDataPtr preprocess = CreatePreprocDataHelper();
-    preprocess->setRoiBlob(in_blob);
-
-    for (const auto& sz : out_sizes) {
-        SizeVector out_dims = {1, 3, static_cast<size_t>(sz.height), static_cast<size_t>(sz.width)};
-        Blob::Ptr out_blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, out_dims, Layout::NHWC));
-        out_blob->allocate();
-        // FIXME: sz with 0 dims must be a separate test
-        if (sz.width > 0 && sz.height > 0) {
-            EXPECT_NO_THROW(preprocess->execute(out_blob, info, false));
-        } else {
-            EXPECT_THROW(preprocess->execute(out_blob, info, false),
-                         InferenceEngine::Exception);
-        }
-    }
-}
-
-// smoke:
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_random_roi_3c_smoke, RandomROITest, random_roi_3c);
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_random_roi_4c_smoke, RandomROITest, random_roi_4c);
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_random_roi_nv12_smoke, RandomROITest, random_roi_nv12);
-
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_resize_only_smoke, CropResizeTest, smoke::params_resize_only);
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_csc_3ch_and_resize_smoke, CropResizeTest, smoke::params_csc_3ch_and_resize);
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_csc_4ch_and_resize_smoke, CropResizeTest, smoke::params_csc_4ch_and_resize);
-
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_resize_only_smoke, DynamicBatchResizeTest, smoke::batch_params_resize_only);
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_csc_3ch_and_resize_smoke, DynamicBatchResizeTest, smoke::batch_params_csc_3ch_and_resize);
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_csc_4ch_and_resize_smoke, DynamicBatchResizeTest, smoke::batch_params_csc_4ch_and_resize);
-
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_reorder_smoke, ReorderTest, reorder_params);
-
-PLUGING_CASE_WITH_SUFFIX(CPU, _gapi_csc_nv12_and_resize_smoke, NV12ColorConvertTest, nv12_params);
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// nightly:
-
-// FIXME: enable these once smoke/nightly concepts are introduced in CI
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_random_roi_3c_nightly, RandomROITest, random_roi_3c);
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_random_roi_4c_nightly, RandomROITest, random_roi_4c);
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_random_roi_nv12_nightly, RandomROITest, random_roi_nv12);
-
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_csc_3ch_and_resize_nightly, CropResizeTest, nightly::params_csc_3ch_and_resize);
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_csc_4ch_and_resize_nightly, CropResizeTest, nightly::params_csc_4ch_and_resize);
-
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_csc_3ch_and_resize_nightly, BatchResizeTest, nightly::batch_params_csc_3ch_and_resize);
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_csc_4ch_and_resize_nightly, BatchResizeTest, nightly::batch_params_csc_4ch_and_resize);
-
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_csc_3ch_and_resize_nightly, DynamicBatchResizeTest, nightly::batch_params_csc_3ch_and_resize);
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_csc_4ch_and_resize_nightly, DynamicBatchResizeTest, nightly::batch_params_csc_4ch_and_resize);
-
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_reorder_nightly, ReorderTest, reorder_params);
-
-PLUGING_CASE_WITH_SUFFIX(DISABLED_CPU, _gapi_csc_nv12_and_resize_nightly, NV12ColorConvertTest, nv12_params);
-
-#endif  // USE_OPENCV
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/dims_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/dims_tests.cpp
deleted file mode 100644
index 963b6676c2d..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/dims_tests.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "dims_tests.hpp"
-
-PLUGING_CASE_WITH_SUFFIX(CPU, _smoke, IO_BlobTest, params);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/layout_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/layout_tests.cpp
deleted file mode 100644
index 084f69911ed..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/io_blob_tests/layout_tests.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "layout_tests.hpp"
-
-static auto params = ::testing::Combine(
-        ::testing::Values(conv_p),
-        ::testing::Values(std::make_pair(Precision::FP32, 1e-5)),
-        ::testing::Values(NCHW, NHWC),
-        ::testing::Values(NCHW, NHWC),
-        ::testing::Values(Precision::FP32, Precision::U8)  // TODO: What about U16/I8/FP16?
-);
-
-PLUGING_CASE_WITH_SUFFIX(CPU, _smoke, LayoutTTTest, params);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/lstm_cell_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/lstm_cell_test.cpp
deleted file mode 100644
index 622259f4f22..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/lstm_cell_test.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "lstm_cell_test.hpp"
-
-RUN_CASE_P_WITH_SUFFIX(CPU, _smoke, LSTMCellTest, workload);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/lstm_ir_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/lstm_ir_test.cpp
deleted file mode 100644
index a4c00554258..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/lstm_ir_test.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "lstm_ir_test.hpp"
-
-RUN_CASE_P_WITH_SUFFIX(CPU, _smoke, LSTM_IR_Test, workload);
-
-static std::vector<ModelInfo> hetero_workload { workload };
-RUN_CASE_P_WITH_SUFFIX(HETERO_CPU, _smoke, LSTM_IR_Test, hetero_workload);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/rnn_seq_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/rnn_seq_test.cpp
deleted file mode 100644
index 882ecdd955d..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/lstm/rnn_seq_test.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "rnn_seq_test.hpp"
-
-RUN_CASE_CP_WITH_SUFFIX(CPU, _smoke, RNNSeqTest, workload);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp
deleted file mode 100644
index cae97a1177f..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <memory>
-#include <unordered_set>
-
-#include <gtest/gtest.h>
-#include "ie_precision.hpp"
-#include <tests_common_func.hpp>
-#include <multi-device/multi_device_config.hpp>
-#include "low_precision_transformations/transformer.hpp"
-#include "common/validation.hpp"
-#include <legacy/ie_util_internal.hpp>
-
-#include "network_i8.hpp"
-
-/*************************************************
- * !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!!
- * All ref values was obtained from Caffe scoring
- * !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!!
- *************************************************/
-
-TEST_P(ModelTransformationsTest, LPT) {}
-
-static void checkLayerInputPrecision(const CNNNetwork& network, const std::string& layerName, Precision expectedPrecision, int inputIndex = -1) {
-    CNNLayerPtr layer = getLayer(network, layerName);
-    if (layer == nullptr) {
-        IE_THROW() << "layer '" << layerName << "' was not found";
-    }
-    for (size_t index = 0ul; index < layer->insData.size(); ++index) {
-        if ((inputIndex != -1) && (index != inputIndex)) {
-            continue;
-        }
-
-        const DataWeakPtr weakData = layer->insData[index];
-        ASSERT_EQ(expectedPrecision, weakData.lock()->getPrecision()) << " unexpected precision " << weakData.lock()->getPrecision() << " for layer " << layerName;
-    }
-}
-
-ModelParams getModelParams(const std::string modelName) {
-std::map<std::string, ModelParams> modelParams = {
-    // {
-    //    "inception_v2_tf",
-    //    ModelParams(
-    //            "inception_v2_tf",
-    //            "inception_v2/inception_v2_i8.xml",
-    //            "validation_set/224x224/dog.bmp",
-    //            {{157, 9.49783 },  // 157 row: 'Blenheim spaniel'
-    //            { 219, 7.13866 },  // 219 row: 'Welsh springer spaniel',
-    //            { 216, 5.60607 },  // 153 row: 'Japanese spaniel',
-    //            { 220, 5.23158 }}
-    //    )
-    // },
-
-    {
-        "inception_v3_tf",
-        ModelParams(
-                "inception_v3_tf",
-                "inception_v3/inception_v3_i8.xml",
-                "validation_set/299x299/dog.bmp",
-                {{157, 10.1683},  // 157 row: 'Blenheim spaniel'
-                 { 219, 5.751 },   // 219 row: 'Welsh springer spaniel',
-                 { 153, 4.9502 },  // 153 row: 'Japanese spaniel',
-                 { 216, 4.79769 }}
-        )
-    },
-        {
-            "mobilenet_v2_tf_depthwise",
-            ModelParams(
-                "mobilenet_v2_tf_depthwise",
-                "mobilenet_v2_1.4_224/mobilenet_v2_1.4_224_i8.xml",
-                "validation_set/224x224/dog.bmp",
-                // original (FP32, no LPT) output tensor
-                {{ 157, 8.63748 },
-                 { 219, 6.29954 },
-                 { 216, 4.7303 },   // Windows, Linux: {218, 4.75413}
-                 { 218, 4.69319 },  // Windows, Linux: {216, 4.75355}
-                 { 220, 3.67249 }},
-                {},
-                [](const TransformationsParams& transformationsParam, CNNNetworkImplPtr usedNetwork) {
-                    if (transformationsParam.transformationsInTestEnabled && transformationsParam.params.updatePrecisions) {
-                        const static std::vector<std::pair<std::string, std::string>> fakeQuantizeAndConcolutionItems = {
-                            // U8 with shift on activations
-                            {"MobilenetV2/Conv/Conv2D/fq_input_0", ""},
-                            {"MobilenetV2/expanded_conv/project/Conv2D/fq_input_0", "MobilenetV2/expanded_conv/project/BatchNorm/FusedBatchNormV3/variance/Fused_Add_"},
-                            // I8 on activations
-                            {"MobilenetV2/expanded_conv_1/expand/Conv2D/fq_input_0", ""},
-                            {"MobilenetV2/expanded_conv_1/project/Conv2D/fq_input_0", "MobilenetV2/expanded_conv_1/project/BatchNorm/FusedBatchNormV3/variance/Fused_Add_"},
-                            // I8 on activations
-                            {"MobilenetV2/expanded_conv_2/add/fq_input_1", ""},
-                            {"MobilenetV2/expanded_conv_2/project/Conv2D/fq_input_0", "MobilenetV2/expanded_conv_2/project/BatchNorm/FusedBatchNormV3/variance/Fused_Add_"},
-                            // I8 on activations
-                            {"MobilenetV2/expanded_conv_3/expand/Conv2D/fq_input_0", ""}
-                        };
-
-                        for (const std::pair<std::string, std::string> item : fakeQuantizeAndConcolutionItems) {
-                            TestsCommonFunc::checkLayerOuputPrecision(usedNetwork, item.first, Precision::U8);
-                            if (!item.second.empty()) {
-                                checkLayerInputPrecision(usedNetwork, item.second, Precision::U8, 0);
-                            }
-                        }
-                    }
-                })
-        },
-        {
-            "resnet_50_tf",
-            ModelParams(
-                "resnet_50_tf",
-                "resnet_v1_50/resnet_v1_50_i8.xml",
-                "validation_set/224x224/dog.bmp",
-                {{ 156, 16.1796 },
-                 { 218, 11.9186 },
-                 { 219, 10.8054 },
-                 { 217, 10.1224 },
-                 { 152, 9.60148 }},
-                {},
-                [](const TransformationsParams& transformationsParam, CNNNetwork usedNetwork) {
-                    if (transformationsParam.transformationsInTestEnabled && transformationsParam.params.updatePrecisions) {
-                        const Precision originalPrecision = Precision::FP32;
-                        const Precision targetPrecision = Precision::U8;
-
-                        //Eltwise CPU/GPU specific
-                        TestsCommonFunc::checkLayerOuputPrecision(usedNetwork, "resnet_v1_50/block1/unit_1/bottleneck_v1/add/fq_input_0", originalPrecision);
-                        TestsCommonFunc::checkLayerOuputPrecision(usedNetwork, "resnet_v1_50/block1/unit_1/bottleneck_v1/add/fq_input_1", Precision::I8);
-
-                        TestsCommonFunc::checkLayerOuputPrecision(usedNetwork, "resnet_v1_50/block2/unit_1/bottleneck_v1/add/fq_input_0", originalPrecision);
-                        TestsCommonFunc::checkLayerOuputPrecision(usedNetwork, "resnet_v1_50/block2/unit_1/bottleneck_v1/add/fq_input_1", Precision::I8);
-                    }
-                })
-        },
-    };
-
-    const auto it = modelParams.find(modelName);
-    if (it == modelParams.end()) {
-        IE_THROW() << "parameters for model '" << modelName << "' were not found";
-    }
-    return it->second;
-}
-
-//0.005f,
-INSTANTIATE_TEST_CASE_P(
-        smoke_Inception,
-        ModelTransformationsTest,
-        ::testing::Values(
-                // TransformationsParams("CPU", getModelParams("inception_v2_tf"), 1ul, true, false, createParam()),
-
-                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, false, createParam(), {}, 3ul),
-                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamI8I8(), {}, 0, false),
-                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8I8(), {}, 0),
-                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8U8(), {}, 0),
-                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateLevel)),
-                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateIntervals)),
-                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, true, false, createParam()),
-                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 2ul, true, false, createParam())
-        ),
-        TransformationsParams::getLowPrecisionTransformerSingleLayerTestName);
-
-INSTANTIATE_TEST_CASE_P(
-        smoke_MobileNet,
-        ModelTransformationsTest,
-        ::testing::Values(
-                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, false, createParamU8I8(), {}, 2),
-// TODO: eshoguli: fix this issue
-//                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamI8I8()),
-//                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8I8()),
-//                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8U8(), {}, 2),
-//                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamCpu(), { "464/Pool", "465/Pool" }),
-                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, true, false, createParamU8I8(), {}, 2),
-                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 2ul, true, false, createParamU8I8(), {}, 2)
-        ),
-        TransformationsParams::getLowPrecisionTransformerSingleLayerTestName);
-
-INSTANTIATE_TEST_CASE_P(
-        smoke_ResNet,
-        ModelTransformationsTest,
-        ::testing::Values(
-                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false),
-                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamI8I8(), {
-                        // TODO: remove when eltwise validation was added
-                        "resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
-                        "resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
-                }),
-                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8I8(), {
-//            // TODO: remove when eltwise validation was added
-                        "resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
-                        "resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
-                }),
-                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8U8(), {
-                        // TODO: remove when eltwise validation was added
-                        "resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
-                        "resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
-                }),
-                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamCpu(), {
-                        // TODO: remove when eltwise validation was added
-                        "resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
-                        "resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
-                }),
-                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, true),
-                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 2ul, true)
-        ),
-        TransformationsParams::getLowPrecisionTransformerSingleLayerTestName);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
deleted file mode 100644
index 090fc1bf99d..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
+++ /dev/null
@@ -1,862 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "low_precision_transformer_single_layer_tests.hpp"
-#include <gtest/gtest.h>
-#include <string>
-#include <memory>
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-
-TEST_P(SingleLayerTransformationsTest, LPT) {
-}
-
-INSTANTIATE_TEST_CASE_P(
-    smoke_SingleLayerTransformationsTestFP32,
-    SingleLayerTransformationsTest,
-    ::testing::Values(
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            PowerTestModel::Ptr(new PowerTestModel(1.f, 1.f, 0)),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            PowerTestModel::Ptr(new PowerTestModel(1.f, 2.89f, 64)),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            PowerTestModel::Ptr(new PowerTestModel(1.f, -32.f, 0)),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            PowerTestModel::Ptr(new PowerTestModel(1.f, 1.f, -64.f)),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            PowerTestModel::Ptr(new PowerTestModel(3.5f, 1.f, 0)),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ResampleTestModel()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FullyConnectedAndScaleShiftsOnActivationsTestModel()),
-            { { 1, 2048 } },
-            { { 1, 1000 } }),
-
-        // TODO: uncomment later
-        //SingleLayerTransformationsTestParams(
-        //    "MKLDNNPlugin",
-        //    SingleLayerTestModel::Ptr(new FullyConnectedTestModel({ 1, 128, 12, 64 }, { 128, 768 })),
-        //    { { 1, 128, 12, 64 } },
-        //    { { 128, 768 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FullyConnectedTestModel({ 1, 128, 12, 64 }, { 1, 128, 768 })),
-            { { 1, 128, 12, 64 } },
-            { { 1, 128, 768 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndWeightsNegativeTestModel()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnUnsignedActivationsAndWeightsTestModel()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndInvertedWeightsTestModel()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeReshapePoolingTestModelWithConstants()),
-            { { 1, 1280, 7 } },
-            { { 1, 1280, 7 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeReshapePoolingTestModelWithoutConstants()),
-            { { 1, 1280, 7 } },
-            { { 1, 1280, 7 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeReshapeTestModelWithConstants()),
-            { { 1, 256, 6, 6 } },
-            { { 1, 9216 } }),
-
-        // TODO: fix asymmetric patern creation issue for NC layout and uncomment
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new FullyConnectedAndQuantizeTestModel()),
-        //    { { 1, 32, 1, 1 } },
-        //    { { 1, 32, 1, 1 } }),
-
-        // TODO: uncomment when biases correction with absent biases will be fixed
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new GemmAndQuantizeTestModel()),
-        //    { { 1, 32, 149, 149 } },
-        //    { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new PoolingTestModel()),
-            { { 149, 149, 32, 1 } },
-            { { 149, 149, 32, 1 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel()),
-            { { 1, 32, 147, 147 } },
-            { { 1, 64, 147, 147 } }),
-
-        // Const transformation is disabled
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndPoolingAndQuantizeOnActivationsTestModel()),
-            { { 1, 64, 147, 147 } },
-            { { 1, 80, 73,  73  } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnActivationsTestModel()),
-            { { 1, 3,  299, 299 } },
-            { { 1, 32, 149, 149 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel()),
-            { { 1, 3,  299, 299 } },
-            { { 1, 32, 149, 149 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel()),
-            { { 1, 3, 299, 299 } },
-            { { 1, 32, 149, 149 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionDepthwiseTestModel()),
-            { { 1, 32, 112, 112 } },
-            { { 1, 32, 112, 112 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionGroupedTestModel()),
-            { { 1, 32, 112, 112 } },
-            { { 1, 32, 112, 112 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatMultiChannelTestModel()),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConcatMultiBranchTestModel()),
-        //    { { 299, 299, 3, 1 }, { 299, 299, 3, 1 } },
-        //    { { 299, 299, 12, 1 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new QuantizationOnWeightsTestModel()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } }),
-
-        // TODO: fix later
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new QuantizationOnInvertedWeightsTestModel()),
-        //    { { 1, 32, 149, 149 } },
-        //    { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeAsOutputTest()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeWithMultiOutputsTest()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeAndScaleShiftTestModel()),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationTestModel({ {-10.25, 10.1641} })),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationTestModel({ {-0.00174255, 0.00174255} })),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationTestModel({ {-329.688, 327.188} })),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationWithNegativeScalesTestModel()),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeAndActivationWithNegativeSlopeTestModel()),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ScaleShiftAndFakeQuantizeTestModel()),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeWithTwoScaleShiftsAsOutput()),
-            { { 1, 32, 28, 28 }, { } },
-            { { } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new MvnTestModel(0ul, 0ul)),
-            { { 1, 4, 128, 128, 128 } },
-            { { 1, 4, 128, 128, 128 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new MvnTestModel(1ul, 0ul)),
-            { { 1, 4, 128, 128, 128 } },
-            { { 1, 4, 128, 128, 128 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new MvnTestModel(0ul, 1ul)),
-            { { 1, 4, 128, 128, 128 } },
-            { { 1, 4, 128, 128, 128 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new MvnTestModel(1ul, 1ul)),
-            { { 1, 4, 128, 128, 128 } },
-            { { 1, 4, 128, 128, 128 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new PrecisionSelectionMultibranchPreservedTestModel(true)),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 149, 149 }, { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new PrecisionSelectionMultibranchPreservedTestModel(false)),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 149, 149 }, { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new PrecisionSelectionMultibranchNotPreservedTestModel(true)),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 149, 149 }, { 1, 32, 147, 147 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new PrecisionSelectionMultibranchNotPreservedTestModel(false)),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 149, 149 }, { 1, 32, 147, 147 } })
-    ),
-    SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
-
-INSTANTIATE_TEST_CASE_P(
-    smoke_EltwiseTestFP32,
-    SingleLayerTransformationsTest,
-    ::testing::Values(
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseTestModel(true, "sum", true)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseTestModel(true, "sum", false)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseTestModel(true, "mul", true)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseTestModel(true, "mul", false)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseFqWithChildrenTestModel(true, "sum", true)),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseFqWithChildrenTestModel(true, "sum", false)),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseFqWithChildrenTestModel(true, "mul", true)),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseFqWithChildrenTestModel(true, "mul", false)),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } })
-    ),
-    SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
-
-INSTANTIATE_TEST_CASE_P(
-    smoke_ConcatTestFP32,
-    SingleLayerTransformationsTest,
-    ::testing::Values(
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatTestModel(true, true, true)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatTestModel(true, true, false)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatTestModel(true, false)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatTestModel(false, true)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConcatTestModel(false, false)),
-        //    { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-        //    { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatTestModel(true, true, true, { 100, 1 })),
-            { { 100, 1 }, { 100, 1 } },
-            { { 100, 2 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatTestModel(true, true, false, { 100, 1 })),
-            { { 100, 1 }, { 100, 1 } },
-            { { 100, 2 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatTestModel(false, true, true, { 100, 1 })),
-            { { 100, 1 }, { 100, 1 } },
-            { { 100, 2 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatTestModel(false, true, false, { 100, 1 })),
-            { { 100, 1 }, { 100, 1 } },
-            { { 100, 2 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(false, false, false, 2.0)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(false, true, false, 2.0)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(true, false, false, 2.0)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(true, true, false, 2.0)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(false, false, true, 2.0)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(false, true, true, 2.0)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(true, false, true, 2.0)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(true, true, true, 2.0)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } })
-    ),
-    SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
-
-INSTANTIATE_TEST_CASE_P(
-    smoke_ScaleShiftToConvolutionFP32,
-    SingleLayerTransformationsTest,
-    ::testing::Values(
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ScaleShiftToConvolutionAfterNotConcatIgnoreTestModel()),
-            { { 1, 64, 112, 112 } },
-            { { 1, 64, 112, 112 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel()),
-            { { 1, 64, 112, 112 } },
-            { { 1, 64, 112, 112 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ScaleShiftToConvolutionAfterConcatTestModel(true)),
-            { { 1, 32, 299, 299 }, { 1, 32, 299, 299 } },
-            { { 1, 64, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ScaleShiftToConvolutionAfterConcatTestModel(false)),
-            { { 1, 32, 299, 299 }, { 1, 32, 299, 299 } },
-            { { 1, 64, 299, 299 } })
-    ),
-    SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
-
-INSTANTIATE_TEST_CASE_P(
-    smoke_UpdateBiases,
-    SingleLayerTransformationsTest,
-    ::testing::Values(
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new UpdateBiasesConvolutionTestModel(false)),
-            { { 1, 32, 112, 112 } },
-            { { 1, 32, 112, 112 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new UpdateBiasesConvolutionTestModel(true)),
-            { { 1, 32, 112, 112 } },
-            { { 1, 32, 112, 112 } })
-
-        // TODO: uncomment later
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(false)),
-        //    { { 1, 32, 112, 112 } },
-        //    { { 1, 100 } }),
-
-        // TODO: uncomment later
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(true)),
-        //    { { 1, 32, 112, 112 } },
-        //    { { 1, 100 } })
-    ),
-    SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
-
-INSTANTIATE_TEST_CASE_P(
-    smoke_EltwiseCpuWithPooling,
-    SingleLayerTransformationsTest,
-    ::testing::Values(
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseWithPoolingTestModel(true, "mul", false)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseWithPoolingTestModel(true, "mul", true)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseWithPoolingTestModel(true, "sum", false)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseWithPoolingTestModel(true, "sum", true)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } })
-    ),
-    SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
-
-INSTANTIATE_TEST_CASE_P(
-    smoke_Eltwise,
-    SingleLayerTransformationsTest,
-    ::testing::Values(
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseWithPoolingTestModel(true, "sum", false)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseWithPoolingTestModel(true, "sum", true)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseCpuTestModel()),
-            { { 1, 3, 299, 299 } },
-            { {} }),
-
-//        SingleLayerTransformationsTestParams(
-//            "CPU",
-//            SingleLayerTestModel::Ptr(new EltwiseTestModel()),
-//            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-//            { {} },
-//            "FP16"),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseCpuTestModel()),
-            { { 1, 3, 299, 299 } },
-            { {} },
-            "FP16"),
-
-//        SingleLayerTransformationsTestParams(
-//            "CPU",
-//            SingleLayerTestModel::Ptr(new EltwiseBroadcastTestModel()),
-//            { { 1, 128, 128 }, { 1, 128, 128 } },
-//            { { 1, 128, 128 } }),
-
-        SingleLayerTransformationsTestParams( // 5
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseBroadcastTestModel()),
-            { { 1, 1,   128 }, { 1, 128, 128 } },
-            { { 1, 128, 128 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseBroadcastTestModel()),
-            { { 1, 128, 128 }, { 1, 128, 1 } },
-            { { 1, 128, 128 } }),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new EltwiseBroadcastTestModel()),
-            { { 1, 1,   128 }, { 1, 128, 1 } },
-            { { 1, 128, 128 } })));
-
-INSTANTIATE_TEST_CASE_P(
-    smoke_SingleLayerTransformationsTestFP16,
-    SingleLayerTransformationsTest,
-    ::testing::Values(
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FullyConnectedAndScaleShiftsOnActivationsTestModel()),
-            { { 1, 2048 } },
-            { { 1, 1000 } },
-            "FP16"),
-
-        // TODO: uncomment after fix
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndWeightsTestModel()),
-        //    { { 1, 32, 149, 149 } },
-        //    { { 1, 32, 147, 147 } },
-        //    "FP16"),
-
-        // TODO: uncomment after fix
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnUnsignedActivationsAndWeightsTestModel()),
-        //    { { 1, 32, 149, 149 } },
-        //    { { 1, 32, 147, 147 } },
-        //    "FP16"),
-
-        // TODO: uncomment after fix
-//        SingleLayerTransformationsTestParams(
-//            "CPU",
-//            SingleLayerTestModel::Ptr(new FakeQuantizeReshapePoolingTestModelWithConstants()),
-//            { { 1, 1280, 7 } },
-//            { { 1, 1280, 7 } },
-//            "FP16"),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeReshapePoolingTestModelWithoutConstants()),
-            { { 1, 1280, 7 } },
-            { { 1, 1280, 7 } },
-            "FP16"),
-
-        // TODO: uncomment after fix
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new FakeQuantizeReshapeTestModelWithConstants()),
-        //    { { 1, 256, 6, 6 } },
-        //    { { 1, 9216 } },
-        //    "FP16"),
-
-        //Not parametrized yet. Executed on FP32
-
-        // TODO: fix asymmetric patern creation issue for NC layout and uncomment
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new FullyConnectedAndQuantizeTestModel()),
-        //    { { 1, 32, 149, 149 } },
-        //    { { 1, 32, 147, 147 } },
-        //    "FP16"),
-
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new GemmAndQuantizeTestModel()),
-        //    { { 1, 32, 149, 149 } },
-        //    { { 1, 32, 147, 147 } },
-        //    "FP16"),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new PoolingTestModel()),
-            { { 149, 149, 32, 1 } },
-            { { 149, 149, 32, 1 } },
-            "FP16"),
-
-        // TODO: failed on I8 on activations - uncomment after fix
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel()),
-        //    { { 1, 32, 147, 147 } },
-        //    { { 1, 64, 147, 147 } },
-        //    "FP16"),
-
-        // TODO: uncomment after fix
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnWeightsWithoutConstTransformationTestModel()),
-        //    { { 1, 32, 149, 149 } },
-        //    { { 1, 32, 147, 147 } },
-        //    "FP16"),
-
-        // TODO: uncomment after fix
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConvolutionAndPoolingAndQuantizeOnActivationsTestModel()),
-        //    { { 1, 64, 147, 147 } },
-        //    { { 1, 80, 73,  73  } },
-        //    "FP16"),
-
-        // TODO: uncomment after fix
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnActivationsTestModel()),
-        //    { { 1, 3,  299, 299 } },
-        //    { { 1, 32, 149, 149 } },
-        //    "FP16"),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndDequantizationScaleShiftsOnActivationsTestModel()),
-            { { 1, 3,  299, 299 } },
-            { { 1, 32, 149, 149 } },
-            "FP16"),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConvolutionAndDequantizationScaleShiftAndQuantizeOnActivationsTestModel()),
-            { { 1, 3, 299, 299 } },
-            { { 1, 32, 149, 149 } },
-            "FP16"),
-
-        // TODO: fix and uncomment
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConvolutionDepthwiseTestModel()),
-        //    { { 1, 32, 112, 112 } },
-        //    { { 1, 32, 112, 112 } },
-        //    "FP16"),
-
-        // TODO: fix and uncomment
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConvolutionGroupedTestModel()),
-        //    { { 1, 32, 112, 112 } },
-        //    { { 1, 32, 112, 112 } },
-        //    "FP16"),
-
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConcatTestModel(true)),
-        //    { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-        //    { { 1, 6, 299, 299 } },
-        //    "FP16"),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatTestModel(false, true)),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 } },
-            "FP16"),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new ConcatMultiChannelTestModel()),
-            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
-            { { 1, 6, 299, 299 }, },
-            "FP16"),
-
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ConcatMultiBranchTestModel()),
-        //    { { 299, 299, 3, 1 }, { 299, 299, 3, 1 } },
-        //    { { 299, 299, 12, 1 } },
-        //    "FP16"),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new QuantizationOnWeightsTestModel()),
-            { { 1, 32, 149, 149 } },
-            { { 1, 32, 147, 147 } },
-            "FP16"),
-
-        // TODO: fix later
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new QuantizationOnInvertedWeightsTestModel()),
-        //    { { 1, 32, 149, 149 } },
-        //    { { 1, 32, 147, 147 } },
-        //    "FP16"),
-
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new FakeQuantizeAndScaleShiftTestModel()),
-            { { 1, 3, 299, 299 } },
-            { { 1, 3, 299, 299 } },
-            "FP16")
-
-        // TODO: fix and uncomment
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ScaleShiftToConvolutionAfterNotConcatIgnoreTestModel()),
-        //    { { 1, 64, 112, 112 } },
-        //    { { 1, 64, 112, 112 } },
-        //    "FP16")
-
-        // TODO: fix and uncomment
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel()),
-        //    { { 1, 64, 112, 112 } },
-        //    { { 1, 64, 112, 112 } },
-        //    "FP16")
-
-        // TODO: fix and uncomment
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new ScaleShiftToConvolutionAfterConcatTestModel()),
-        //    { { 1, 32, 299, 299 }, { 1, 32, 299, 299 } },
-        //    { { 1, 64, 299, 299 } },
-        //    "FP16")
-
-        // TODO: fix and uncomment
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new UpdateBiasesConvolutionTestModel(false)),
-        //    { { 1, 32, 112, 112 } },
-        //    { { 1, 32, 112, 112 } },
-        //    "FP16"),
-
-        // TODO: fix and uncomment
-        //SingleLayerTransformationsTestParams(
-        //    "CPU",
-        //    SingleLayerTestModel::Ptr(new UpdateBiasesConvolutionTestModel(true)),
-        //    { { 1, 32, 112, 112 } },
-        //    { { 1, 32, 112, 112 } },
-        //    "FP16")
-        ),
-    SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/argmax_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/argmax_tests.cpp
deleted file mode 100644
index 66e5b21c39e..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/argmax_tests.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include "common_test_utils/data_utils.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-struct argmax_test_params {
-    std::vector<size_t> src_dims;
-    std::vector<size_t> dst_dims;
-    int has_axis;
-    int axis;
-    int out_max_val;
-    int top_k;
-};
-
-static inline int count(std::vector<size_t> dims, size_t start_ind, size_t end_ind) {
-    size_t count = 1;
-    for (size_t i = start_ind; i < end_ind; i++)
-        count *= dims[i];
-    return static_cast<int>(count);
-}
-
-static inline int count(std::vector<size_t> dims, size_t start_ind = 0) {
-    return count(dims, start_ind, dims.size());
-}
-
-static void ref_argmax(InferenceEngine::TBlob<float> &src, InferenceEngine::TBlob<float> &dst, argmax_test_params p) {
-    float *src_data = src.data();
-    float* dst_data = dst.data();
-
-    int dim, axis_dist;
-    if (p.has_axis) {
-        int axis_ = (p.axis < 0) ? p.axis + static_cast<int>(p.src_dims.size()) : p.axis;
-        dim = static_cast<int>(p.src_dims[axis_]);
-        axis_dist = count(p.src_dims, axis_) / dim;
-    } else {
-        dim = count(p.src_dims, 1);
-        axis_dist = 1;
-    }
-
-    int num = count(p.src_dims) / dim;
-    std::vector<std::pair<float, int> > src_vector(dim);
-
-    for (int i = 0; i < num; ++i) {
-        for (int j = 0; j < dim; ++j) {
-            src_vector[j] = std::make_pair(
-                    src_data[(i / axis_dist * dim + j) * axis_dist + i % axis_dist], j);
-        }
-
-        std::partial_sort(src_vector.begin(), src_vector.begin() + p.top_k,
-                          src_vector.end(), std::greater<std::pair<float, int> >());
-
-        for (int j = 0; j < p.top_k; ++j) {
-            if (p.out_max_val) {
-                if (p.has_axis) {
-                    // Produces max_val per axis
-                    dst_data[(i / axis_dist * p.top_k + j) * axis_dist + i % axis_dist] = src_vector[j].first;
-                } else {
-                    // Produces max_ind and max_val
-                    dst_data[2 * i * p.top_k + j] = src_vector[j].second;
-                    dst_data[2 * i * p.top_k + p.top_k + j] = src_vector[j].first;
-                }
-            } else {
-                // Produces max_ind per axis
-                dst_data[(i / axis_dist * p.top_k + j) * axis_dist + i % axis_dist] = src_vector[j].second;
-            }
-        }
-    }
-}
-
-class smoke_CPU_ArgmaxOnlyTest: public TestsCommon, public WithParamInterface<argmax_test_params> {
-    std::string model_t = R"V0G0N(
-<net name="ArgmaxOnly" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer id="0" name="input" type="Input" precision="FP32" >
-            <output>
-                <port id="0">__SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer id="1" name="argmax" type="ArgMax" precision="FP32">
-            <data _AXIS_ out_max_val="__OUT_MAX_VAL__" top_k="__TOP_K__"/>
-            <input>
-                <port id="0">__SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="1">__DST_DIMS__
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-    std::string getModel(argmax_test_params p) {
-        std::string model = model_t;
-
-        std::string src_dims;
-        for (auto &dim : p.src_dims) {
-            src_dims += "\n                    <dim>";
-            src_dims += std::to_string(dim) + "</dim>";
-        }
-        REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
-
-        std::string dst_dims;
-        for (auto &dim : p.dst_dims) {
-            dst_dims += "\n                    <dim>";
-            dst_dims += std::to_string(dim) + "</dim>";
-        }
-        REPLACE_WITH_STR(model, "__DST_DIMS__", dst_dims);
-
-        std::string axis;
-        if (p.has_axis) {
-            axis += "axis=\"" + std::to_string(p.axis) + "\"";
-        }
-        REPLACE_WITH_STR(model, "_AXIS_", axis);
-
-        REPLACE_WITH_STR(model, "__OUT_MAX_VAL__", std::to_string(p.out_max_val));
-        REPLACE_WITH_STR(model, "__TOP_K__", std::to_string(p.top_k));
-
-        return model;
-    }
-
-    virtual void SetUp() {
-        try {
-            argmax_test_params p = ::testing::WithParamInterface<argmax_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork net = ie.ReadNetwork(model, Blob::CPtr());
-
-            Blob::Ptr src = make_shared_blob<float>({Precision::FP32, p.src_dims, Layout::ANY});
-            src->allocate();
-
-            TBlob<float>* srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            CommonTestUtils::fill_data_sine(src->buffer(), src->size(), 0.5, 0.5, 1);
-
-            BlobMap srcs;
-            srcs.insert(std::pair<std::string, Blob::Ptr>("input", src));
-
-            OutputsDataMap out;
-            out = net.getOutputsInfo();
-            BlobMap outputBlobs;
-
-            std::pair<std::string, DataPtr> item = *out.begin();
-
-            TBlob<float>::Ptr output;
-            output = make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_argmax(*srcPtr, dst_ref, p);
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(net, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            inferRequest.SetInput(srcs);
-            inferRequest.SetOutput(outputBlobs);
-            inferRequest.Infer();
-
-            compare(*outputBlobs.begin()->second, dst_ref);
-
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPU_ArgmaxOnlyTest, TestsArgmax) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsArgmax, smoke_CPU_ArgmaxOnlyTest,
-        ::testing::Values(
-                argmax_test_params{{1, 3, 1024, 2048}, {1, 1, 1024, 2048}, 1, 1, 0, 1},
-                argmax_test_params{{1, 5, 1024, 2048}, {1, 1, 1024, 2048}, 1, 1, 1, 1},
-                argmax_test_params{{3, 1, 10, 512}, {3}, 0, 1, 0, 1},
-                argmax_test_params{{3, 1, 10, 512}, {3, 2}, 0, 1, 1, 1},
-                argmax_test_params{{1, 20, 128, 128}, {1, 3, 128, 128}, 1, 1, 0, 3},
-                argmax_test_params{{1, 20, 128, 128}, {1, 3, 128, 128}, 1, 1, 1, 3},
-                argmax_test_params{{3, 1, 10, 512}, {3, 5}, 0, 1, 0, 5},
-                argmax_test_params{{3, 1, 10, 512}, {3, 5, 2}, 0, 1, 1, 5},
-                argmax_test_params{{1, 20, 128, 128}, {1, 18, 128, 128}, 1, 1, 0, 18},
-                argmax_test_params{{1, 20, 128, 128}, {1, 18, 128, 128}, 1, 1, 1, 18}
-        ));
-
-INSTANTIATE_TEST_CASE_P(
-        TestsArgmaxOddDims, smoke_CPU_ArgmaxOnlyTest,
-        ::testing::Values(
-                argmax_test_params{{1, 3, 1025, 2049}, {1, 1, 1025, 2049}, 1, 1, 0, 1},
-                argmax_test_params{{1, 5, 1025, 2049}, {1, 1, 1025, 2049}, 1, 1, 1, 1},
-                argmax_test_params{{1, 20, 129, 129}, {1, 3, 129, 129}, 1, 1, 0, 3},
-                argmax_test_params{{1, 20, 129, 129}, {1, 3, 129, 129}, 1, 1, 1, 3}
-        ));
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/concat_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/concat_tests.cpp
deleted file mode 100644
index 55e4c18b1c3..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/concat_tests.cpp
+++ /dev/null
@@ -1,277 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-
-struct concat_base_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in1;
-
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in2;
-
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } out;
-
-    size_t axis;
-};
-
-struct concat_test_params : concat_base_params {
-    std::string device_name;
-
-    concat_test_params(std::string name, concat_base_params params)
-            : concat_base_params(params), device_name(name) {}
-};
-
-template <typename data_t>
-void check_concat_fwd(const TBlob<data_t> &src, concat_test_params prm)
-{
-}
-
-class smoke_CPU_ConcatOnlyTest: public TestsCommon,
-                    public WithParamInterface<concat_test_params> {
-
-    std::string model_t = R"V0G0N(
-<net name="ConcatOnly" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>_IN1_</dim>
-                    <dim>_IC1_</dim>
-                    <dim>_IH1_</dim>
-                    <dim>_IW1_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    <dim>_IN2_</dim>
-                    <dim>_IC2_</dim>
-                    <dim>_IH2_</dim>
-                    <dim>_IW2_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="con" id="3" type="Concat" precision="FP32">
-            <concat_data axis="_AXIS_"/>
-            <input>
-                <port id="1">
-                    <dim>_IN1_</dim>
-                    <dim>_IC1_</dim>
-                    <dim>_IH1_</dim>
-                    <dim>_IW1_</dim>
-                </port>
-                <port id="2">
-                    <dim>_IN2_</dim>
-                    <dim>_IC2_</dim>
-                    <dim>_IH2_</dim>
-                    <dim>_IW2_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>_ON_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(concat_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IN1_", p.in1.n);
-        REPLACE_WITH_NUM(model, "_IC1_", p.in1.c);
-        REPLACE_WITH_NUM(model, "_IW1_", p.in1.w);
-        REPLACE_WITH_NUM(model, "_IH1_", p.in1.h);
-
-        REPLACE_WITH_NUM(model, "_IN2_", p.in2.n);
-        REPLACE_WITH_NUM(model, "_IC2_", p.in2.c);
-        REPLACE_WITH_NUM(model, "_IW2_", p.in2.w);
-        REPLACE_WITH_NUM(model, "_IH2_", p.in2.h);
-
-        REPLACE_WITH_NUM(model, "_ON_", p.out.n);
-        REPLACE_WITH_NUM(model, "_OC_", p.out.c);
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h);
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        return model;
-    }
-
-protected:
-
-    static void fill_data_ints(float *data, size_t size, int start) {
-        for (size_t i = 0; i < size; i++) {
-            data[i] = (float) (start + i);
-        }
-    }
-
-    virtual void SetUp() {
-
-        try {
-            concat_test_params p = ::testing::WithParamInterface<concat_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, Blob::CPtr());
-
-            SizeVector dims_src1 = {p.in1.n,
-                                    p.in1.c,
-                                    p.in1.h,
-                                    p.in1.w
-                                    };
-
-            SizeVector dims_src2 = {p.in2.n,
-                                    p.in2.c,
-                                    p.in2.h,
-                                    p.in2.w};
-
-            SizeVector dims_dst = {p.out.n,
-                                   p.out.c,
-                                   p.out.h,
-                                   p.out.w};
-
-            Blob::Ptr src1 = make_shared_blob<float>({Precision::FP32, dims_src1, Layout::NCHW});
-            src1->allocate();
-            fill_data_ints(src1->buffer(), src1->size(), 0);
-            Blob::Ptr src2 =  make_shared_blob<float>({Precision::FP32, dims_src2, Layout::NCHW});
-            src2->allocate();
-            fill_data_ints(src2->buffer(), src2->size(), 10000);
-            BlobMap srcs;
-            srcs.insert(std::pair<std::string, Blob::Ptr>("in1", src1));
-            srcs.insert(std::pair<std::string, Blob::Ptr>("in2", src2));
-
-            OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.device_name);
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            inferRequest.SetInput(srcs);
-            inferRequest.SetOutput(outputBlobs);
-            inferRequest.Infer();
-
-            //compare(src, dst);
-
-            float *src1_ptr = src1->buffer();
-            float *src2_ptr = src2->buffer();
-            float *dst_ptr = output->buffer();
-
-            int len1 = 1, len2 = 1, cycles;
-            for (int dim = p.axis; dim < output->getTensorDesc().getDims().size(); dim++) {
-                len1 *= src1->getTensorDesc().getDims()[dim];
-                len2 *= src2->getTensorDesc().getDims()[dim];
-            }
-            cycles = p.axis;
-
-
-            int index1 = 0, index2 = 0, index = 0;
-            for (int cycle = 0; cycle < cycles; cycle ++) {
-                for (int i1 = 0; i1 < len1; i1++) {
-                    if (src1_ptr[index1] != dst_ptr[index])
-                    {
-                        FAIL() << "index: " << index << " src: " << src1_ptr[index1] << ", dst: " << dst_ptr[index];
-                    }
-                    index1++; index++;
-                }
-                for (int i2 = 0; i2 < len2; i2++) {
-                    if (src2_ptr[index2] != dst_ptr[index])
-                    {
-                        FAIL() << "index: " << index << " src: " << src2_ptr[index2] << ", dst: " << dst_ptr[index];
-                    }
-                    index2++; index++;
-                }
-            }
-
-
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-#define case_1 concat_base_params({\
-	{1, 7, 2, 5},\
-	{1, 7, 2, 5},\
-	{2, 7, 2, 5},\
-	0})
-#define case_2 concat_base_params({\
-	{1, 7, 2, 5},\
-	{1, 7, 2, 5},\
-	{1, 7, 4, 5},\
-	2})
-#define case_3 concat_base_params({\
-	{1, 7, 2, 5},\
-	{1, 13, 2, 5},\
-	{1, 20, 2, 5},\
-	1})
-#define case_4 concat_base_params({\
-	{1, 7, 2, 13},\
-	{1, 7, 2, 17},\
-	{1, 7, 2, 30},\
-	3})
-#define case_5 concat_base_params({\
-	{1, 8, 8, 16},\
-	{1, 16, 8, 16},\
-	{1, 24, 8, 16},\
-	1})
-
-TEST_P(smoke_CPU_ConcatOnlyTest, TestsConcat) {
-}
-
-std::string  getTestCaseName(testing::TestParamInfo<concat_test_params> obj) {
-    return  obj.param.device_name +
-        "_out_w" + std::to_string(obj.param.out.w) +
-        "_out_h" + std::to_string(obj.param.out.h) +
-        "_out_c" + std::to_string(obj.param.out.c) +
-        "_out_n" + std::to_string(obj.param.out.n);
-}
-
-concat_test_params concat_only_test_cases[] = {
-        concat_test_params("CPU", case_1),
-        concat_test_params("CPU", case_2),
-        concat_test_params("CPU", case_3),
-        concat_test_params("CPU", case_4),
-        concat_test_params("CPU", case_5),
-};
-
-INSTANTIATE_TEST_CASE_P(TestConcat, smoke_CPU_ConcatOnlyTest, ::testing::ValuesIn(concat_only_test_cases), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_int8_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_int8_tests.cpp
deleted file mode 100644
index cf31c1e7be6..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_int8_tests.cpp
+++ /dev/null
@@ -1,363 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "../common_single_layer_tests/conv_ref.hpp"
-#include <string>
-#include <algorithm>
-
-#include "common_test_utils/common_layers_params.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using std::vector;
-
-struct conv_base_params {
-    vector<size_t> in_dims;
-    vector<size_t> kernel;
-    vector<size_t> strides;
-    vector<size_t> pads_begin;
-    vector<size_t> pads_end;
-    vector<size_t> dilations;
-
-    size_t out_c;
-    size_t grp_c;
-
-    vector<size_t> out_dims;
-};
-
-struct conv_test_params : conv_base_params {
-    std::string device_name;
-
-    conv_test_params(std::string name, conv_base_params params) :
-            conv_base_params(params), device_name(name) {}
-};
-
-template <typename data_t>
-static void fill_int_data_even(data_t *data, size_t size, bool is_signed) {
-    for (size_t i = 0 ; i < size; i++) {
-        data[i] = (i * 13 % 21 - 10 * is_signed) * 2;
-    }
-}
-
-template <typename data_t>
-static void fill_int_data(data_t *data, size_t size, bool is_signed) {
-    for (size_t i = 0 ; i < size; i++) {
-        data[i] = i * 13 % 21 - 10 * is_signed;
-    }
-}
-
-template <typename src_data_t>
-class smoke_ConvolutionInt8OnlyTest : public TestsCommon,
-                                  public WithParamInterface<conv_test_params> {
-
-    std::string model_t = (std::string)R"V0G0N(
-<net name="Convolution_Only" version="3" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="_IP_" id="0">
-            <output>
-                <port id="0">
-                    _INPUT_DIMS_
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" id="1" type="Convolution" precision="I8">
-            <convolution strides="_KS_"
-                         pads_begin="_PB_"  pads_end="_PE_"
-                         kernel="_K_"
-                         dilations="_DL_"
-                         output="_OC_"  group="_GC_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="1">
-                    _INPUT_DIMS_
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    _OUTPUT_DIMS_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-
-    size_t calculateOutDim(size_t in_dim, size_t kernel, size_t stride, size_t pad_begin) {
-        return (in_dim + 2lu * pad_begin - kernel) / stride + 1lu;
-    }
-
-    void createBlobs(const conv_test_params &p, typename TBlob<src_data_t>::Ptr &src, TBlob<float>::Ptr &dst, TBlob<float>::Ptr &dst_ref) {
-        auto in_size = p.in_dims.size();
-        auto out_size = p.out_dims.size();
-        SizeVector dims_dst = {
-                p.out_dims[out_size - 1] == 0 ?
-                calculateOutDim(p.in_dims[in_size - 1], p.kernel[X_AXIS], p.strides[X_AXIS], p.pads_begin[X_AXIS]) : p.out_dims[out_size - 1],
-                p.out_dims[out_size - 2] == 0 ?
-                calculateOutDim(p.in_dims[in_size - 2], p.kernel[Y_AXIS], p.strides[Y_AXIS], p.pads_begin[Y_AXIS]) : p.out_dims[out_size - 2],
-                p.out_c,
-                1lu};
-        SizeVector dims_src;
-        for (int i = in_size; i > 0; i--) {
-            dims_src.push_back(p.in_dims[i - 1]);
-        }
-
-        Layout layout = NCHW;
-        if (in_size == 5) {
-            layout = NCDHW;
-            dims_dst.insert(dims_dst.begin() + 2, p.out_dims.size() > 2 ?
-                                                  (p.out_dims[out_size - 3] == 0 ?
-                                                   calculateOutDim(p.in_dims[in_size - 3], p.kernel[Z_AXIS], p.strides[Z_AXIS], p.pads_begin[Z_AXIS]) : p.out_dims[out_size - 3]) : 1lu);
-        }
-
-        std::reverse(dims_src.begin(), dims_src.end());
-        std::reverse(dims_dst.begin(), dims_dst.end());
-
-        Precision src_precision = (typeid(src_data_t) == typeid(int8_t)) ? Precision::I8 : Precision::U8;
-        src = make_shared_blob<src_data_t>(TensorDesc({src_precision, dims_src, layout}));
-        src->allocate();
-
-        dst = make_shared_blob<float>(TensorDesc({Precision::FP32, dims_dst, layout}));
-        dst->allocate();
-
-        dst_ref = make_shared_blob<float>(TensorDesc({Precision::FP32, dims_dst, layout}));
-        dst_ref->allocate();
-    }
-
-    TBlob<uint8_t>::Ptr fillWeights(const conv_test_params &p) {
-        auto KZ = p.kernel.size() > Z_AXIS ? p.kernel[Z_AXIS] : 1lu;
-        TBlob<uint8_t> *weights_ptr = new TBlob<uint8_t>(TensorDesc({Precision::U8,
-                                                         {p.kernel[X_AXIS] * p.kernel[Y_AXIS] * KZ * p.out_c * p.in_dims[1] / p.grp_c * sizeof(uint8_t)
-                                                         + p.out_c * sizeof(int32_t)}, C}));
-        weights_ptr->allocate();
-        size_t bias_size = p.out_c;
-        size_t weights_size = (weights_ptr->size() - bias_size * sizeof(int32_t)) / sizeof(uint8_t);
-        int8_t *weights_data = (int8_t *) weights_ptr->buffer();
-        auto *bias_data = (int32_t *)(weights_data + weights_size);
-
-        if (typeid(src_data_t) == typeid(int8_t)) {
-            // If input data is signed, weight data is divided by 2 due to the specifics of implementation in mkl-dnn
-            fill_int_data_even(weights_data, weights_size, true);
-        } else {
-            fill_int_data(weights_data, weights_size, true);
-        }
-        fill_int_data(bias_data, bias_size, true);
-
-        return TBlob<uint8_t>::Ptr(weights_ptr);
-    }
-
-    void calculateRef(const TBlob<uint8_t>::Ptr &weights, const conv_test_params &p, const typename TBlob<src_data_t>::Ptr &src,
-                      TBlob<float>::Ptr &dst_ref) {
-        const int8_t *weights_data = (const int8_t *) weights->buffer();
-        size_t bias_size = p.out_c;
-        size_t weights_size = (weights->size() - bias_size * sizeof(int32_t)) / sizeof(uint8_t);
-        auto *bias_data = (const int32_t *)(weights_data + weights_size);
-        CommonTestUtils::conv_common_params params;
-        for (int i = 0; i < p.kernel.size(); i++)
-            params.kernel.insert(i, p.kernel[i]);
-        for (int i = 0; i < p.strides.size(); i++)
-            params.stride.insert(i, p.strides[i]);
-        for (int i = 0; i < p.pads_begin.size(); i++)
-            params.pads_begin.insert(i, p.pads_begin[i]);
-        for (int i = 0; i < p.dilations.size(); i++)
-            params.dilation.insert(i, p.dilations[i]);
-        params.group = p.grp_c;
-        params.out_c = p.out_c;
-        ref_conv_common<>({ src }, *dst_ref.get(), weights_data, weights_size, bias_data, bias_size, params);
-    }
-
-    void SetUp() override {
-        try {
-            conv_test_params p = ::testing::WithParamInterface<conv_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            typename TBlob<src_data_t>::Ptr src;
-            TBlob<float>::Ptr dst, dst_ref;
-            createBlobs(p, src, dst, dst_ref);
-            auto *src_data = src->cbuffer().template as<src_data_t*>();
-            size_t src_size = src->size() / sizeof(src_data_t);
-            if (typeid(src_data_t) == typeid(int8_t)) {
-                fill_int_data(src_data, src_size, true);
-            } else {
-                fill_int_data(src_data, src_size, false);
-            }
-
-            auto weights = fillWeights(p);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, weights);
-
-            BlobMap srcs;
-            srcs.insert(std::pair<std::string, Blob::Ptr>("in1", src));
-
-            OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, DataPtr> item = *out.begin();
-
-            outputBlobs[item.first] = dst;
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.device_name);
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            inferRequest.SetInput(srcs);
-            inferRequest.SetOutput(outputBlobs);
-            inferRequest.Infer();
-
-            calculateRef(weights, p, src, dst_ref);
-            compare(*dst, *dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-
-    virtual std::string getModel(conv_test_params p) {
-        std::string model = model_t;
-
-        auto in_dims_size = p.in_dims.size();
-        std::string input_dims = "<dim>" + std::to_string(p.in_dims[0]) + "</dim>";
-        for (int i = 1; i < in_dims_size; i++) {
-            input_dims += "\n                    <dim>" + std::to_string(p.in_dims[i]) + "</dim>";
-        }
-        REPLACE_WITH_STR(model, "_INPUT_DIMS_", input_dims);
-
-        auto out_dims_size = p.out_dims.size();
-        std::string output_dims = "<dim>" + std::to_string(p.in_dims[0]) + "</dim>";
-        output_dims += "\n                    <dim>" + std::to_string(p.out_c) + "</dim>";
-        if (out_dims_size > 2) {
-            size_t od = (p.out_dims[out_dims_size - 3] == 0 ?
-                         calculateOutDim(p.in_dims[in_dims_size - 3], p.kernel[Z_AXIS], p.strides[Z_AXIS], p.pads_begin[Z_AXIS]) : p.out_dims[out_dims_size - 3]);
-            output_dims += "\n                    <dim>" + std::to_string(od) + "</dim>";
-        }
-        size_t oh = p.out_dims[out_dims_size - 2] == 0 ?
-                    calculateOutDim(p.in_dims[in_dims_size - 2], p.kernel[Y_AXIS], p.strides[Y_AXIS], p.pads_begin[Y_AXIS]) : p.out_dims[out_dims_size - 2];
-        output_dims += "\n                    <dim>" + std::to_string(oh) + "</dim>";
-        size_t ow = p.out_dims[out_dims_size - 1] == 0 ?
-                    calculateOutDim(p.in_dims[in_dims_size - 1], p.kernel[X_AXIS], p.strides[X_AXIS], p.pads_begin[X_AXIS]) : p.out_dims[out_dims_size - 1];
-        output_dims += "\n                    <dim>" + std::to_string(ow) + "</dim>";
-        REPLACE_WITH_STR(model, "_OUTPUT_DIMS_", output_dims);
-
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.kernel);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.strides);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.pads_begin);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.pads_begin);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_DL_", p.dilations);
-
-        REPLACE_WITH_NUM(model, "_GC_", p.grp_c);
-        REPLACE_WITH_NUM(model, "_OC_", p.out_c);
-
-        std::string ip = (typeid(src_data_t) == typeid(int8_t)) ? "I8" : "U8";
-        REPLACE_WITH_STR(model, "_IP_", ip);
-
-        size_t KD = p.kernel.size() > Z_AXIS ? p.kernel[Z_AXIS] : 1lu;
-        size_t w_data_size = (p.kernel[X_AXIS] * p.kernel[Y_AXIS] * KD * p.out_c * p.in_dims[1] / p.grp_c) * sizeof(uint8_t);
-        size_t b_data_size = p.out_c;
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-
-        return model;
-    }
-};
-
-// conv_base_params ({in_dims, kernel, strides, pads_begin, pads_end, dilations, out_c, grp_c, out_dims})
-// If out_dims are zero, they are calculated automatically.
-// 2D
-#define case_1  conv_base_params({{1, 9, 16, 32},  {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, 17, 1, {0, 0}})
-#define case_2  conv_base_params({{1, 9, 32, 16},  {2, 4}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, 17, 1, {0, 0}})
-#define case_3  conv_base_params({{1, 9, 32, 16},  {2, 4}, {2, 1}, {0, 0}, {0, 0}, {1, 1}, 17, 1, {0, 0}})
-#define case_4  conv_base_params({{1, 3, 40, 40},  {3, 3}, {1, 2}, {0, 0}, {0, 0}, {1, 1}, 20, 1, {0, 0}})
-#define case_5  conv_base_params({{1, 9, 16, 32},  {7, 7}, {2, 2}, {3, 3}, {0, 0}, {1, 1}, 17, 1, {0, 0}})
-#define case_6  conv_base_params({{1, 3, 224, 224}, {7, 7}, {2, 2}, {2, 2}, {0, 0}, {1, 1}, 64, 1, {111, 111}})
-#define case_7  conv_base_params({{1, 16, 40, 40}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, 16, 16, {0, 0}})
-#define case_8  conv_base_params({{1, 32, 16, 32}, {7, 7}, {2, 2}, {3, 3}, {0, 0}, {1, 1}, 32, 32, {0, 0}})
-#define case_9  conv_base_params({{1, 16, 40, 40}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {9, 9}, 16, 16, {0, 0}})
-#define case_10 conv_base_params({{1, 32, 16, 32}, {7, 7}, {2, 2}, {3, 3}, {0, 0}, {3, 3}, 32, 32, {2, 10}})
-#define case_11 conv_base_params({{1, 4, 16, 32},  {7, 7}, {2, 2}, {3, 3}, {0, 0}, {2, 2}, 4, 4, {5, 13}})
-#define case_12 conv_base_params({{1, 3, 224, 224}, {10, 10}, {1, 1}, {4, 4}, {0, 0}, {1, 1}, 4, 1, {223, 223}})
-#define case_13 conv_base_params({{1, 32, 1, 15000}, {11, 1}, {1, 1}, {20, 0}, {0, 0}, {4, 1}, 32, 1, {1, 15000}})
-#define case_14 conv_base_params({{1, 16, 40, 40}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, 16, 8, {0, 0}})
-#define case_15 conv_base_params({{1, 16, 40, 40}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, 8, 2, {0, 0}})
-#define case_16 conv_base_params({{1, 3, 40, 40}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, 9, 3, {0, 0}})
-// 3D
-#define case_3d_0 conv_base_params({{1, 3, 16, 32, 32},  {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, 17, 1, {0, 0, 0}})
-#define case_3d_1 conv_base_params({{1, 3, 16, 32, 32},  {3, 3, 3}, {2, 2, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, 64, 1, {0, 0, 0}})
-#define case_3d_2 conv_base_params({{1, 32, 8, 8, 8},  {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, 32, 32, {0, 0, 0}})
-#define case_3d_3 conv_base_params({{1, 32, 10, 10, 10},  {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, 32, 32, {0, 0, 0}})
-#define case_3d_4 conv_base_params({{1, 32, 8, 8, 8},  {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, 32, 32, {0, 0, 0}})
-#define case_3d_5 conv_base_params({{1, 32, 8, 8, 8},  {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, 16, 16, {0, 0, 0}})
-#define case_3d_6 conv_base_params({{1, 32, 10, 10, 10},  {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, 16, 8, {0, 0, 0}})
-#define case_3d_7 conv_base_params({{1, 4, 8, 8, 8},  {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, 16, 4, {0, 0, 0}})
-
-using smoke_conv_u8s32 = smoke_ConvolutionInt8OnlyTest<uint8_t>;
-
-TEST_P(smoke_conv_u8s32, TestsConvolution) {
-}
-
-std::string getTestCaseInt8Name(testing::TestParamInfo<conv_test_params> obj) {
-    auto in_dims_size = obj.param.in_dims.size();
-    return obj.param.device_name +
-           "_w" + std::to_string(obj.param.in_dims[in_dims_size - 1]) +
-           "_h" + std::to_string(obj.param.in_dims[in_dims_size - 2]) +
-           (obj.param.in_dims.size() > 4 ? "_d" + std::to_string(obj.param.in_dims[in_dims_size - 3]) : "") +
-           "_c" + std::to_string(obj.param.in_dims[1]) +
-           "_kw" + std::to_string(obj.param.kernel[X_AXIS]) +
-           "_kh" + std::to_string(obj.param.kernel[Y_AXIS]) +
-           (obj.param.kernel.size() > Z_AXIS ? "_kd" + std::to_string(obj.param.kernel[Z_AXIS]) : "") +
-           "_sw" + std::to_string(obj.param.strides[X_AXIS]) +
-           "_sh" + std::to_string(obj.param.strides[Y_AXIS]) +
-           (obj.param.strides.size() > Z_AXIS ? "_sd" + std::to_string(obj.param.strides[Z_AXIS]) : "") +
-           "_dilw" + std::to_string(obj.param.dilations[X_AXIS]) +
-           "_dilh" + std::to_string(obj.param.dilations[Y_AXIS]) +
-           (obj.param.dilations.size() > Z_AXIS ? "_dild" + std::to_string(obj.param.dilations[Z_AXIS]) : "") +
-           "_grpc" + std::to_string(obj.param.grp_c);
-}
-
-conv_test_params conv_only_int8_test_cases[] = {
-        conv_test_params("CPU", case_1),
-        conv_test_params("CPU", case_2),
-        conv_test_params("CPU", case_3),
-        conv_test_params("CPU", case_4),
-        conv_test_params("CPU", case_5),
-        conv_test_params("CPU", case_6),
-//// todo: it does not work on AVX-512
-//        conv_test_params("CPU", case_7),
-//        conv_test_params("CPU", case_8),
-//        conv_test_params("CPU", case_9),
-//        conv_test_params("CPU", case_10),
-//        conv_test_params("CPU", case_11),
-        conv_test_params("CPU", case_12),
-        conv_test_params("CPU", case_13),
-        conv_test_params("CPU", case_14),
-        conv_test_params("CPU", case_15),
-        conv_test_params("CPU", case_16),
-};
-
-conv_test_params conv_only_int8_3d_test_cases[] = {
-        conv_test_params("CPU", case_3d_0),
-        conv_test_params("CPU", case_3d_1),
-        conv_test_params("CPU", case_3d_2),
-        conv_test_params("CPU", case_3d_3),
-        conv_test_params("CPU", case_3d_4),
-        conv_test_params("CPU", case_3d_5),
-        conv_test_params("CPU", case_3d_6),
-        conv_test_params("CPU", case_3d_7),
-};
-
-INSTANTIATE_TEST_CASE_P(
-        TestConvolution, smoke_conv_u8s32, ::testing::ValuesIn(conv_only_int8_test_cases), getTestCaseInt8Name);
-
-INSTANTIATE_TEST_CASE_P(
-        TestConvolution_3d, smoke_conv_u8s32, ::testing::ValuesIn(conv_only_int8_3d_test_cases), getTestCaseInt8Name);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests.cpp
deleted file mode 100644
index 490e155f6b3..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests.cpp
+++ /dev/null
@@ -1,429 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "../common_single_layer_tests/conv_ref.hpp"
-#include <single_layer_common.hpp>
-#include <string>
-#include "common_test_utils/common_layers_params.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using std::vector;
-
-struct conv_base_params {
-    vector<size_t> in_dims;
-    vector<size_t> kernel;
-    vector<size_t> strides;
-    vector<size_t> pads_begin;
-    vector<size_t> pads_end;
-    vector<size_t> dilations;
-
-    size_t out_c;
-    size_t grp_c;
-
-    vector<size_t> out_dims;
-};
-
-struct conv_test_params : conv_base_params {
-    std::string device_name;
-
-    conv_test_params(std::string name, conv_base_params params) :
-            conv_base_params(params), device_name(name) {}
-};
-
-class smoke_ConvolutionOnlyTest : public TestsCommon,
-                            public WithParamInterface<conv_test_params> {
-
-    std::string model_t_4D = R"V0G0N(
-<net name="Convolution_Only" version="3" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" id="1" type="Convolution" precision="FP32">
-            <convolution strides="_KS_"
-                         pads_begin="_PB_" pads_end="_PE_"
-                         kernel="_K_"
-                         dilations="_DL_"
-                         output="_OC_" group="_GC_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string model_t_5D = R"V0G0N(
-<net name="Convolution_Only" version="3" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" id="1" type="Convolution" precision="FP32">
-            <convolution strides="_KS_"
-                         pads_begin="_PB_"  pads_end="_PE_"
-                         kernel="_K_"
-                         dilations="_DL_"
-                         output="_OC_"  group="_GC_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OD_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-
-    size_t calculateOutDim(size_t in_dim, size_t kernel, size_t stride, size_t pad_begin) {
-        return (in_dim + 2lu * pad_begin - kernel) / stride + 1lu;
-    }
-
-    void createBlobs(const conv_test_params &p, TBlob<float>::Ptr &src, TBlob<float>::Ptr &dst, TBlob<float>::Ptr &dst_ref) {
-        auto in_size = p.in_dims.size();
-        auto out_size = p.out_dims.size();
-        SizeVector dims_dst = {
-                p.out_dims[out_size - 1] == 0 ?
-                    calculateOutDim(p.in_dims[in_size - 1], p.kernel[X_AXIS], p.strides[X_AXIS], p.pads_begin[X_AXIS]) : p.out_dims[out_size - 1],
-                p.out_dims[out_size - 2] == 0 ?
-                    calculateOutDim(p.in_dims[in_size - 2], p.kernel[Y_AXIS], p.strides[Y_AXIS], p.pads_begin[Y_AXIS]) : p.out_dims[out_size - 2],
-                p.out_c,
-                1lu};
-        SizeVector dims_src;
-        for (int i = in_size; i > 0; i--) {
-            dims_src.push_back(p.in_dims[i - 1]);
-        }
-
-        Layout layout = NCHW;
-        if (in_size == 5) {
-            layout = NCDHW;
-            dims_dst.insert(dims_dst.begin() + 2, p.out_dims.size() > 2 ?
-                (p.out_dims[out_size - 3] == 0 ?
-                    calculateOutDim(p.in_dims[in_size - 3], p.kernel[Z_AXIS], p.strides[Z_AXIS], p.pads_begin[Z_AXIS]) : p.out_dims[out_size - 3]) : 1lu);
-        }
-
-        src = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), layout));
-        src->allocate();
-
-        dst = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), layout));
-        dst->allocate();
-
-        dst_ref = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), layout));
-        dst_ref->allocate();
-    }
-
-    TBlob<uint8_t>::Ptr fillWeights(const conv_test_params &p) {
-        auto KZ = p.kernel.size() > Z_AXIS ? p.kernel[Z_AXIS] : 1lu;
-        TBlob<uint8_t> *weights_ptr = new TBlob<uint8_t>(TensorDesc(Precision::U8,
-                    {(p.kernel[X_AXIS] * p.kernel[Y_AXIS] * KZ * p.out_c * p.in_dims[1] / p.grp_c + p.out_c)
-                     * sizeof(float)}, C));
-        weights_ptr->allocate();
-        fill_data((float *) weights_ptr->buffer(), weights_ptr->size() / sizeof(float));
-        return TBlob<uint8_t>::Ptr(weights_ptr);
-    }
-
-    void calculateRef(const TBlob<uint8_t>::Ptr &weights, const conv_test_params &p, const TBlob<float>::Ptr &src,
-                      TBlob<float>::Ptr &dst_ref) {
-        const float *weights_data = (const float *) weights->buffer();
-        size_t bias_size = p.out_c;
-        size_t weights_size = weights->size() / sizeof(float) - bias_size;
-        const float *bias_data = weights_data + weights_size;
-        CommonTestUtils::conv_common_params params;
-        for (int i = 0; i < p.kernel.size(); i++)
-            params.kernel.insert(i, p.kernel[i]);
-        for (int i = 0; i < p.strides.size(); i++)
-            params.stride.insert(i, p.strides[i]);
-        for (int i = 0; i < p.pads_begin.size(); i++)
-            params.pads_begin.insert(i, p.pads_begin[i]);
-        for (int i = 0; i < p.dilations.size(); i++)
-            params.dilation.insert(i, p.dilations[i]);
-        params.group = p.grp_c;
-        params.out_c = p.out_c;
-        ref_conv_common<>({ src }, *dst_ref.get(), weights_data, weights_size, bias_data, bias_size, params);
-    }
-
-    CNNNetwork getNetwork(const TBlob<uint8_t>::Ptr &weights, const conv_test_params &p) {
-        Core ie;
-        return ie.ReadNetwork(getModel(p), weights);
-    }
-
-    virtual void
-    infer(CNNNetwork &network, const conv_test_params &p, TBlob<float>::Ptr &src, TBlob<float>::Ptr &dst) {
-        Blob::Ptr srcPtr = std::shared_ptr<Blob>(src);
-        Blob::Ptr dstPtr = std::shared_ptr<Blob>(dst);
-
-        Core ie;
-        ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-        InferRequest inferRequest = exeNetwork.CreateInferRequest();
-        OutputsDataMap outInfo;
-        outInfo = network.getOutputsInfo();
-        ASSERT_EQ(outInfo.size(), 1);
-        ASSERT_NE(outInfo.begin()->second, nullptr);
-        inferRequest.SetBlob(network.getInputsInfo().begin()->first, srcPtr);
-        inferRequest.SetBlob(outInfo.begin()->first, dstPtr);
-        inferRequest.Infer();
-    }
-
-    void SetUp() override {
-        try {
-            conv_test_params p = ::testing::WithParamInterface<conv_test_params>::GetParam();
-            TBlob<float>::Ptr src, dst, dst_ref;
-            createBlobs(p, src, dst, dst_ref);
-            fill_data(src->data(), src->size());
-            auto weights = fillWeights(p);
-            calculateRef(weights, p, src, dst_ref);
-            CNNNetwork network = getNetwork(weights, p);
-            infer(network, p, src, dst);
-            compare(*dst, *dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-
-    virtual std::string getModel(conv_test_params p) {
-        std::string model;
-        auto in_dims_size = p.in_dims.size();
-        if (in_dims_size == 4) {
-            model = model_t_4D;
-        } else if (in_dims_size == 5) {
-            model = model_t_5D;
-        }
-
-        REPLACE_WITH_NUM(model, "_IW_", p.in_dims[in_dims_size - 1]);
-        REPLACE_WITH_NUM(model, "_IH_", p.in_dims[in_dims_size - 2]);
-        REPLACE_WITH_NUM(model, "_ID_", p.in_dims[in_dims_size - 3]);
-        REPLACE_WITH_NUM(model, "_IC_", p.in_dims[1]);
-        REPLACE_WITH_NUM(model, "_IN_", p.in_dims[0]);
-
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.kernel);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.strides);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.pads_begin);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.pads_end);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_DL_", p.dilations);
-
-        auto out_dims_size = p.out_dims.size();
-        REPLACE_WITH_NUM(model, "_GC_", p.grp_c);
-        REPLACE_WITH_NUM(model, "_OC_", p.out_c);
-        REPLACE_WITH_NUM(model, "_OD_", out_dims_size > 2 ?
-                (p.out_dims[out_dims_size - 3] == 0 ?
-                    calculateOutDim(p.in_dims[in_dims_size - 3], p.kernel[Z_AXIS], p.strides[Z_AXIS], p.pads_begin[Z_AXIS]) : p.out_dims[out_dims_size - 3]) :
-                        1lu);
-        REPLACE_WITH_NUM(model, "_OH_", p.out_dims[out_dims_size - 2] == 0 ?
-                calculateOutDim(p.in_dims[in_dims_size - 2], p.kernel[Y_AXIS], p.strides[Y_AXIS], p.pads_begin[Y_AXIS]) : p.out_dims[out_dims_size - 2]);
-        REPLACE_WITH_NUM(model, "_OW_", p.out_dims[out_dims_size - 1] == 0 ?
-                calculateOutDim(p.in_dims[in_dims_size - 1], p.kernel[X_AXIS], p.strides[X_AXIS], p.pads_begin[X_AXIS]) : p.out_dims[out_dims_size - 1]);
-
-        size_t KD = p.kernel.size() > Z_AXIS ? p.kernel[Z_AXIS] : 1lu;
-        size_t w_data_size = (p.kernel[X_AXIS] * p.kernel[Y_AXIS] * KD * p.out_c * p.in_dims[1] / p.grp_c) * sizeof(float);
-        size_t b_data_size = p.out_c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-        return model;
-    }
-};
-
-class smoke_ConvolutionReshapeTest : public smoke_ConvolutionOnlyTest {
-protected:
-    void SetUp() override {
-        try {
-            conv_test_params p = ::testing::WithParamInterface<conv_test_params>::GetParam();
-            TBlob<float>::Ptr src, dst, dst_ref;
-            auto weights = fillWeights(p);
-            CNNNetwork network = getNetwork(weights, p);
-            infer(network, p, src, dst);
-            updatePaddings(network, p);
-            dst_ref = std::make_shared<TBlob<float>>(dst->getTensorDesc());
-            dst_ref->allocate();
-            calculateRef(weights, p, src, dst_ref);
-            compare(*dst, *dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-
-    void updatePaddings(const CNNNetwork &network, conv_test_params& p) {
-        details::CNNNetworkIterator i(network), end;
-        auto found = std::find_if(i, end, [](const CNNLayer::Ptr& layer) {
-            return layer->type == "Convolution";
-        });
-        ASSERT_NE(found, end);
-        auto convLayer = std::dynamic_pointer_cast<ConvolutionLayer>(*found);
-        auto allPad = getPaddings(*convLayer.get());
-        p.pads_begin[X_AXIS] = allPad.begin[X_AXIS];
-        p.pads_begin[Y_AXIS] = allPad.begin[Y_AXIS];
-        if (p.pads_begin.size() > Z_AXIS)
-            p.pads_begin[Z_AXIS] = allPad.begin[Z_AXIS];
-    }
-    void
-    infer(CNNNetwork &network, const conv_test_params &p, TBlob<float>::Ptr &src, TBlob<float>::Ptr &dst) override {
-        Core ie;
-        auto firstInputInfo = *network.getInputsInfo().begin();
-        std::string inputName = firstInputInfo.first;
-        auto firstOutputInfo = *network.getOutputsInfo().begin();
-        std::string outputName = firstOutputInfo.first;
-        auto inputShapes = network.getInputShapes();
-        IE_ASSERT(inputShapes.size() == 1);
-        inputShapes.begin()->second = p.in_dims;
-        ASSERT_NO_THROW(network.reshape(inputShapes));
-
-        ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.device_name);
-        InferRequest request = exeNetwork.CreateInferRequest();
-        Blob::Ptr src_b = request.GetBlob(inputName);
-
-        src = std::dynamic_pointer_cast<TBlob<float>>(src_b);
-        fill_data(src->data(), src->size());
-        request.Infer();
-        Blob::Ptr dst_b = request.GetBlob(outputName);
-        dst = std::dynamic_pointer_cast<TBlob<float>>(dst_b);
-    }
-
-    std::string getModel(conv_test_params p) override {
-        std::string model = smoke_ConvolutionOnlyTest::getModel(p);
-        REPLACE_WITH_STR(model, "convolution", "convolution auto_pad=\"same_upper\"");
-        std::string pads_pattern = "pads_begin=\"";
-        for (int i = p.pads_begin.size(); i > 0; i--) {
-            pads_pattern += std::to_string(p.pads_begin[i - 1]) + ",";
-        }
-        std::string pads = "pads_begin=\"0,0\"";
-        if (p.pads_begin.size() == 3) {
-            pads = "pads_begin=\"0,0,0\"";
-        }
-        REPLACE_WITH_NUM_VECTOR(model, pads_pattern, pads);
-        return model;
-    }
-};
-
-#define case_1  conv_base_params({{1lu, 9lu, 16lu, 32lu},  {1lu, 1lu}, {1lu, 1lu}, {0lu, 0lu}, {0lu, 0lu}, {1lu, 1lu}, 17lu, 1lu, {0lu, 0lu}})
-#define case_2  conv_base_params({{1lu, 9lu, 32lu, 16lu},  {2lu, 4lu}, {1lu, 1lu}, {0lu, 0lu}, {0lu, 0lu}, {1lu, 1lu}, 17lu, 1lu, {0lu, 0lu}})
-#define case_3  conv_base_params({{1lu, 9lu, 32lu, 16lu},  {2lu, 4lu}, {2lu, 1lu}, {0lu, 0lu}, {0lu, 0lu}, {1lu, 1lu}, 17lu, 1lu, {0lu, 0lu}})
-#define case_4  conv_base_params({{1lu, 3lu, 40lu, 40lu},  {3lu, 3lu}, {1lu, 2lu}, {0lu, 0lu}, {0lu, 0lu}, {1lu, 1lu}, 20lu, 1lu, {0lu, 0lu}})
-#define case_5  conv_base_params({{1lu, 9lu, 16lu, 32lu},  {7lu, 7lu}, {2lu, 2lu}, {3lu, 3lu}, {0lu, 0lu}, {1lu, 1lu}, 17lu, 1lu, {0lu, 0lu}})
-#define case_6  conv_base_params({{1lu, 3lu, 224lu, 224lu}, {7lu, 7lu}, {2lu, 2lu}, {2lu, 2lu}, {0lu, 0lu}, {1lu, 1lu}, 64lu, 1lu, {112lu, 112lu}})
-#define case_7  conv_base_params({{1lu, 16lu, 40lu, 40lu}, {3lu, 3lu}, {1lu, 1lu}, {0lu, 0lu}, {0lu, 0lu}, {1lu, 1lu}, 16lu, 16lu, {0lu, 0lu}})
-#define case_8  conv_base_params({{1lu, 32lu, 16lu, 32lu}, {7lu, 7lu}, {2lu, 2lu}, {3lu, 3lu}, {0lu, 0lu}, {1lu, 1lu}, 32lu, 32lu, {0lu, 0lu}})
-#define case_9  conv_base_params({{1lu, 16lu, 40lu, 40lu}, {3lu, 3lu}, {1lu, 1lu}, {0lu, 0lu}, {0lu, 0lu}, {9lu, 9lu}, 16lu, 16lu, {0lu, 0lu}})
-#define case_10 conv_base_params({{1lu, 32lu, 16lu, 32lu}, {7lu, 7lu}, {2lu, 2lu}, {3lu, 3lu}, {0lu, 0lu}, {9lu, 9lu}, 32lu, 32lu, {0lu, 0lu}})
-#define case_11 conv_base_params({{1lu, 4lu, 16lu, 32lu},  {7lu, 7lu}, {2lu, 2lu}, {3lu, 3lu}, {0lu, 0lu}, {9lu, 9lu}, 4lu, 4lu, {0lu, 0lu}})
-#define case_12 conv_base_params({{1lu, 3lu, 224lu, 224lu}, {10lu, 10lu}, {1lu, 1lu}, {4lu, 4lu}, {0lu, 0lu}, {1lu, 1lu}, 4lu, 1lu, {224lu, 224lu}})
-#define case_13 conv_base_params({{1lu, 32lu, 1lu, 15000lu}, {11lu, 1lu}, {1lu, 1lu}, {5lu, 0lu}, {0lu, 0lu}, {4lu, 1lu}, 32lu, 1lu, {15000lu, 1lu}})
-
-
-#define case_14  conv_base_params({{1lu, 3lu, 16lu, 32lu, 32lu},  {1lu, 1lu, 1lu}, {1lu, 1lu, 1lu}, {0lu, 0lu, 0lu}, {0lu, 0lu, 0lu}, {1lu, 1lu, 1lu}, 17lu, 1lu, {0lu, 0lu, 0lu}})
-#define case_15  conv_base_params({{1lu, 3lu, 16lu, 32lu, 32lu},  {3lu, 3lu, 3lu}, {2lu, 2lu, 1lu}, {0lu, 0lu, 0lu}, {0lu, 0lu, 0lu}, {1lu, 1lu, 1lu}, 64lu, 1lu, {0lu, 0lu, 0lu}})
-
-// NOTE: always auto_pad = same_upper. IR with zero_pads, pad from params is used for ref_conv after reshape
-#define case_si_1 conv_base_params({{1lu, 144lu, 75lu, 75lu}, {3lu, 3lu}, {2lu, 2lu}, {1lu, 1lu}, {0lu, 0lu}, {1lu, 1lu}, 144lu, 144lu, {1lu, 1lu}})
-
-// TODO: rewrite to ngraph to have reshape functionality
-TEST_P(smoke_ConvolutionReshapeTest, DISABLED_TestsReshapeConvolution) {
-}
-
-std::string getTestCaseName(testing::TestParamInfo<conv_test_params> obj) {
-    auto in_dims_size = obj.param.in_dims.size();
-    return obj.param.device_name +
-        "_w" + std::to_string(obj.param.in_dims[in_dims_size - 1]) +
-        "_h" + std::to_string(obj.param.in_dims[in_dims_size - 2]) +
-        (obj.param.in_dims.size() > 4 ? "_d" + std::to_string(obj.param.in_dims[in_dims_size - 3]) : "") +
-        "_c" + std::to_string(obj.param.in_dims[1]) +
-        "_kw" + std::to_string(obj.param.kernel[X_AXIS]) +
-        "_kh" + std::to_string(obj.param.kernel[Y_AXIS]) +
-        (obj.param.kernel.size() > Z_AXIS ? "_kd" + std::to_string(obj.param.kernel[Z_AXIS]) : "") +
-        "_sw" + std::to_string(obj.param.strides[X_AXIS]) +
-        "_sh" + std::to_string(obj.param.strides[Y_AXIS]) +
-        (obj.param.strides.size() > Z_AXIS ? "_sd" + std::to_string(obj.param.strides[Z_AXIS]) : "") +
-        "_dilw" + std::to_string(obj.param.dilations[X_AXIS]) +
-        "_dilh" + std::to_string(obj.param.dilations[Y_AXIS]) +
-        (obj.param.dilations.size() > Z_AXIS ? "_dild" + std::to_string(obj.param.dilations[Z_AXIS]) : "") +
-        "_grpc" + std::to_string(obj.param.grp_c);
-}
-
-conv_test_params conv_only_test_cases[] = {
-        conv_test_params("CPU", case_1),
-        conv_test_params("CPU", case_2),
-        conv_test_params("CPU", case_3),
-        conv_test_params("CPU", case_4),
-        conv_test_params("CPU", case_5),
-        conv_test_params("CPU", case_6),
-        conv_test_params("CPU", case_7),
-        conv_test_params("CPU", case_8),
-        conv_test_params("CPU", case_9),
-        conv_test_params("CPU", case_10),
-        conv_test_params("CPU", case_11),
-        conv_test_params("CPU", case_12),
-        conv_test_params("CPU", case_13),
-        conv_test_params("CPU", case_14),
-        conv_test_params("CPU", case_15)
-};
-
-INSTANTIATE_TEST_CASE_P(
-        TestConvolution, smoke_ConvolutionOnlyTest, ::testing::ValuesIn(conv_only_test_cases), getTestCaseName);
-
-INSTANTIATE_TEST_CASE_P(
-        TestSameUpperConvolution, smoke_ConvolutionReshapeTest,
-        ::testing::Values(conv_test_params("CPU", case_si_1)),
-        getTestCaseName);
-
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests_int8.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests_int8.cpp
deleted file mode 100644
index 2ca739cab5e..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/conv_tests_int8.cpp
+++ /dev/null
@@ -1,452 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-
-#include <string>
-
-#include <format_reader/format_reader_ptr.h>
-#include "common_test_utils/data_utils.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-struct conv_int8_base_params {
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    size_t krn_w;
-    size_t krn_h;
-    size_t str_w;
-    size_t str_h;
-    size_t pad_w;
-    size_t pad_h;
-    size_t dil_w;
-    size_t dil_h;
-
-    size_t out_c;
-    size_t grp_c;
-
-    struct {
-        size_t w;
-        size_t h;
-    } out;
-};
-
-struct conv_test_int8_params : conv_int8_base_params {
-    std::string device_name;
-
-    conv_test_int8_params(std::string name, conv_int8_base_params params) :
-            conv_int8_base_params(params), device_name(name) {}
-};
-
-template <typename data_t>
-void ref_conv_relu(const TBlob<data_t> &src, const data_t *weights, const size_t weightsSize,
-                   TBlob<data_t> &dst, conv_test_int8_params prm) {
-    size_t KW = prm.krn_w;
-    size_t KH = prm.krn_h;
-    size_t GC = prm.grp_c;
-
-    size_t IW = src.getTensorDesc().getDims()[3];
-    size_t IH = src.getTensorDesc().getDims()[2];
-    size_t IC = src.getTensorDesc().getDims()[1];
-
-    size_t OW = prm.out.w == 0 ? (IW + 2 * prm.pad_w - prm.krn_w) / prm.str_w + 1 : prm.out.w;
-    size_t OH = prm.out.h == 0 ? (IH + 2 * prm.pad_h - prm.krn_h) / prm.str_h + 1 : prm.out.h;
-    size_t OC = prm.out_c;
-
-    const data_t *src_data = src.readOnly();
-    const data_t *weights_data = weights;
-    const data_t *bias_data = weights_data + KW * KH * OC * IC / GC;
-    data_t *dst_data = dst.data();
-
-    IE_ASSERT(KW * KH * OC * IC / GC + OC == weightsSize);
-    IE_ASSERT(OW == dst.getTensorDesc().getDims()[3]);
-    IE_ASSERT(OH == dst.getTensorDesc().getDims()[2]);
-
-    for (uint32_t g = 0; g < GC; g++) {
-        for (uint32_t oc = 0; oc < OC / GC; oc++) {
-            for (uint32_t oh = 0; oh < OH; oh++) {
-                for (uint32_t ow = 0; ow < OW; ow++) {
-                    size_t oidx = g * OC / GC * OH * OW
-                                  + oc * OH * OW + oh * OW + ow;
-                    dst_data[oidx] = bias_data[g * OC / GC + oc];
-
-                    for (size_t ic = 0; ic < IC / GC; ic++) {
-                        for (size_t kh = 0; kh < KH; kh++) {
-                            for (size_t kw = 0; kw < KW; kw++) {
-                                int32_t iw = ow * prm.str_w - prm.pad_w + kw * (1 + prm.dil_w);
-                                int32_t ih = oh * prm.str_h - prm.pad_h + kh * (1 + prm.dil_h);
-                                if (iw < 0 || iw >= (int32_t)IW || ih < 0
-                                    || ih >= (int32_t)IH)
-                                    continue;
-                                size_t iidx = g * IC / GC * IH * IW
-                                              + ic * IH * IW + ih * IW + iw;
-                                size_t widx = g * OC / GC * IC / GC * KH * KW
-                                              + oc * IC / GC * KH * KW
-                                              + ic * KH * KW + kh * KW + kw;
-
-                                dst_data[ oidx] += src_data[iidx] * weights_data[widx];
-                            }
-                        }
-                    }
-
-                    // Applying ReLU
-                    if (dst_data[oidx] < 0) dst_data[oidx] = 0;
-
-                }
-            }
-        }
-    }
-}
-
-class smoke_ConvolutionInt8Test: public TestsCommon,
-                                 public WithParamInterface<conv_test_int8_params> {
-
-    std::string model_t = R"V0G0N(
-<Net Name="Convolution_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" id="1" type="Convolution" precision="FP32">
-            <convolution stride-x="_SW_" stride-y="_SH_"
-                         pad-x="_PW_"    pad-y="_PH_"
-                         kernel-x="_KW_" kernel-y="_KH_"
-                         output="_OC_"   group="_GC_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="2" name="conv1_relu" type="ReLU" precision="FP32">
-            <input>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1" />
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3" />
-    </edges>
-</Net>
-)V0G0N";
-
-    std::string getModel(conv_test_int8_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-
-        REPLACE_WITH_NUM(model, "_KW_", p.krn_w);
-        REPLACE_WITH_NUM(model, "_KH_", p.krn_h);
-        REPLACE_WITH_NUM(model, "_SW_", p.str_w);
-        REPLACE_WITH_NUM(model, "_SH_", p.str_h);
-        REPLACE_WITH_NUM(model, "_PW_", p.pad_w);
-        REPLACE_WITH_NUM(model, "_PH_", p.pad_h);
-
-        REPLACE_WITH_NUM(model, "_GC_", p.grp_c);
-        REPLACE_WITH_NUM(model, "_OC_", p.out_c);
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h == 0 ? (p.in.h + 2 * p.pad_h - p.krn_h) / p.str_h + 1 : p.out.h);
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w == 0 ? (p.in.w + 2 * p.pad_w - p.krn_w) / p.str_w + 1 : p.out.w);
-
-        size_t w_data_size = (p.krn_w * p.krn_h * p.out_c * p.in.c / p.grp_c )* sizeof(float);
-        size_t b_data_size = p.out_c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-        return model;
-    }
-
-protected:
-    const char* DEFAULT_PATH_P = "./lib";
-
-    static void compare_NRMSD(InferenceEngine::Blob &res, InferenceEngine::Blob &ref, float max_nrmsd = 0.01f) {
-        float *res_ptr = res.buffer().as<float*>();
-        size_t res_size = res.size();
-
-        float *ref_ptr = ref.buffer().as<float*>();
-        size_t ref_size = ref.size();
-
-        ASSERT_EQ(res_size, ref_size);
-
-        float sum = 0;
-
-        float mmin = ref_ptr[0], mmax = ref_ptr[0];
-
-        for (size_t i = 0; i < ref_size; i++) {
-            float sqr = (ref_ptr[i] - res_ptr[i]);
-            sqr *= sqr;
-            sum += sqr;
-
-            mmin = (std::min)(mmin, ref_ptr[i]);
-            mmax = (std::max)(mmax, ref_ptr[i]);
-
-            if (i % 10007 == 0) {
-                std::cout << i << ": " << res_ptr[i] << "\t" << ref_ptr[i] << "\t" << "\tdiv: " << ref_ptr[i] / res_ptr[i] << std::endl;
-            }
-
-        }
-        sum /= ref_size;
-
-        sum = pow(sum, 0.5f);
-
-        sum /= mmax - mmin;
-
-        ASSERT_LE(sum, max_nrmsd);
-    }
-
-    virtual void SetUp() {
-        try {
-            conv_test_int8_params p = ::testing::WithParamInterface<conv_test_int8_params>::GetParam();
-            std::string model = getModel(p);
-
-            TBlob<uint8_t> *weights = new TBlob<uint8_t>(TensorDesc(Precision::U8, {(p.krn_w * p.krn_h * p.out_c * p.in.c / p.grp_c + p.out_c)
-                                                                                    * sizeof(float)}, C));
-            weights->allocate();
-
-            //fill_data_sine((float *) weights->buffer(), weights->size() / sizeof(float), 0.00, 0.005, 0.1);
-            CommonTestUtils::fill_data_sine((float *) weights->buffer(), weights->size() / sizeof(float), 1, 4, 0.3);
-            //fill_data_dbgval((float *) weights->buffer(), weights->size() / sizeof(float));
-            //size_t bias_start = p.krn_w * p.krn_h * p.out_c * p.in.c / p.grp_c;
-            //fill_data_const((float *) weights->buffer() + bias_start, p.out_c, 0.00);
-
-            // Set biases to 0
-            /*for (int i = weights->size() / sizeof(float) - C - 1; i < weights->size() / sizeof(float); i++) {
-                ((float *) weights->buffer())[i] = 0;
-            }*/
-
-
-            TBlob<uint8_t>::Ptr weights_ptr = TBlob<uint8_t>::Ptr(weights);
-
-            // Collecting statistics
-
-            // TODO Load nodes stats from file
-            std::string imageFilename = TestDataHelpers::get_data_path() + "/validation_set/224x224/dog.bmp";
-            std::cout << "Using image file: " << imageFilename << std::endl;
-
-            Core ie;
-            auto network = ie.ReadNetwork(model, weights_ptr);
-
-            SizeVector dims_dst = {p.out.w == 0 ? (p.in.w + 2 * p.pad_w - p.krn_w) / p.str_w + 1 : p.out.w,
-                                   p.out.h == 0 ? (p.in.h + 2 * p.pad_h - p.krn_h) / p.str_h + 1 : p.out.h,
-                                   p.out_c,
-                                   1};
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst->allocate();
-
-            // Setting the statistics data
-
-            CNNNetwork myNetwork = ie.ReadNetwork(model, weights_ptr);
-
-            SizeVector dims_src = {p.in.w,
-                                   p.in.h,
-                                   p.in.c,
-                                   1};          // 1 is a batch size
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-
-
-
-
-
-            std::vector<std::string> imageNames = { imageFilename };
-
-            /** Taking information about all topology inputs **/
-            InputsDataMap inputInfo(myNetwork.getInputsInfo());
-
-            if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies only with 1 input");
-            auto inputInfoItem = *inputInfo.begin();
-
-            /** Specifying the precision of input data provided by the user.
-             * This should be called before load of the network to the plugin **/
-            inputInfoItem.second->setPrecision(Precision::FP32);
-            inputInfoItem.second->setLayout(Layout::NCHW);
-
-
-            std::vector<std::shared_ptr<unsigned char>> imagesData;
-            for (auto & i : imageNames) {
-                FormatReader::ReaderPtr reader(i.c_str());
-                if (reader.get() == nullptr) {
-                    std::cout << "Image " + i + " cannot be read!" << std::endl;
-                    continue;
-                }
-                /** Store image data **/
-                SizeVector dims = inputInfoItem.second->getTensorDesc().getDims();
-                std::shared_ptr<unsigned char> data(reader->getData(dims.back(), dims.at(dims.size() - 2)));
-                if (data.get() != nullptr) {
-                    imagesData.push_back(data);
-                }
-            }
-            if (imagesData.empty()) throw std::logic_error("Valid input images were not found!");
-
-            OutputsDataMap outputInfo(myNetwork.getOutputsInfo());
-            for (auto itOut : outputInfo) {
-                itOut.second->setPrecision(Precision::FP32);
-            }
-
-            /** Filling input tensor with images. First b channel, then g and r channels **/
-            size_t num_chanels = src->getTensorDesc().getDims()[1];
-            size_t image_size = src->getTensorDesc().getDims()[2] * src->getTensorDesc().getDims()[3];
-
-            float* data = src->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
-
-            /** Iterate over all input images **/
-            for (size_t image_id = 0; image_id < imagesData.size(); ++image_id) {
-                /** Iterate over all pixel in image (b,g,r) **/
-                for (size_t pid = 0; pid < image_size; pid++) {
-                    /** Iterate over all channels **/
-                    for (size_t ch = 0; ch < num_chanels; ++ch) {
-                        /**          [images stride + channels stride + pixel id ] all in bytes            **/
-                        data[image_id * image_size * num_chanels + ch * image_size + pid ] = (float)(imagesData.at(image_id).get()[pid*num_chanels + ch]);
-                    }
-                }
-            }
-
-            // Inferring the converted network and comparing the result with the reference
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.device_name);
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            OutputsDataMap outInfo;
-            outInfo = network.getOutputsInfo();
-            ASSERT_EQ(outInfo.size(), 1);
-            ASSERT_NE(outInfo.begin()->second, nullptr);
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(outInfo.begin()->first, dst);
-
-            std::cout << "Inferring int8" << std::endl;
-            inferRequest.Infer();
-
-            // Calculating FP32 reference
-            TBlob<float> dst_ref(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst_ref.allocate();
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-            ref_conv_relu<float>(*srcPtr, (const float *)weights->buffer(), weights->size() / sizeof(float), dst_ref, p);
-
-            // Comparing the result with the reference
-            compare_NRMSD(*dst, dst_ref, 0.17);
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-/*
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    size_t krn_w;
-    size_t krn_h;
-    size_t str_w;
-    size_t str_h;
-    size_t pad_w;
-    size_t pad_h;
-    size_t dil_w;
-    size_t dil_h;
-
-    size_t out_c;
-    size_t grp_c;
-
-    struct {
-        size_t w;
-        size_t h;
-    } out;
-*/
-// Wo=(Wi−F+2P)/S+1
-
-#define case_1 conv_int8_base_params({{4, 4, 3}, 1, 1, 1, 1, 0, 0, 0, 0, 3, 1})
-#define case_2 conv_int8_base_params({{16, 32, 3}, 2, 4, 1, 1, 0, 0, 0, 0, 17, 1})
-#define case_3 conv_int8_base_params({{16, 32, 3}, 2, 4, 2, 1, 0, 0, 0, 0, 17, 1})
-#define case_4 conv_int8_base_params({{40, 40, 3}, 3, 3, 1, 2, 0, 0, 0, 0, 20, 1})
-#define case_5 conv_int8_base_params({{32, 16, 3}, 7, 7, 2, 2, 3, 3, 0, 0, 17, 1})
-#define case_6 conv_int8_base_params({{224, 224, 3}, 7, 7, 2, 2, 2, 2, 0, 0, 64, 1, {112, 112}})
-/*#define case_7 conv_int8_base_params({{40, 40, 16}, 3, 3, 1, 1, 0, 0, 0, 0, 16, 16})
-#define case_8 conv_int8_base_params({{32, 16, 32}, 7, 7, 2, 2, 3, 3, 0, 0, 32, 32})*/
-
-// These tests use dilated convolution and don't work yet
-/*#define case_9 conv_int8_base_params({{40, 40, 16}, 3, 3, 1, 1, 0, 0, 8, 8, 16, 16})
-#define case_10 conv_int8_base_params({{32, 16, 32}, 7, 7, 2, 2, 3, 3, 8, 8, 32, 32})
-#define case_11 conv_int8_base_params({{32, 16, 4}, 7, 7, 2, 2, 3, 3, 8, 8, 4, 4})*/
-
-TEST_P(smoke_ConvolutionInt8Test, TestsConvolution) {
-}
-
-std::string  getTestCaseName(testing::TestParamInfo<conv_test_int8_params> obj) {
-    return  obj.param.device_name +
-        "_w" + std::to_string(obj.param.in.w) +
-        "_h" + std::to_string(obj.param.in.h) +
-        "_c" + std::to_string(obj.param.in.c) +
-        "_krnw" + std::to_string(obj.param.krn_w) +
-        "_krnh" + std::to_string(obj.param.krn_h) +
-        "_strw" + std::to_string(obj.param.str_w) +
-        "_strh" + std::to_string(obj.param.str_h) +
-        "_dilw" + std::to_string(obj.param.dil_w) +
-        "_dilh" + std::to_string(obj.param.dil_h) +
-        "_grpc" + std::to_string(obj.param.grp_c);
-}
-
-conv_test_int8_params conv_int8_test_cases[] = {
-    conv_test_int8_params("CPU", case_1),
-    conv_test_int8_params("CPU", case_2),
-    conv_test_int8_params("CPU", case_3),
-    conv_test_int8_params("CPU", case_4),
-    conv_test_int8_params("CPU", case_5),
-    // conv_test_int8_params("CPU", case_6),
-    //conv_test_int8_params("CPU", case_7),
-    //conv_test_int8_params("CPU", case_8),
-    //conv_test_int8_params("CPU", case_9),
-    //conv_test_int8_params("CPU", case_10),
-    //conv_test_int8_params("CPU", case_11),
-};
-
-INSTANTIATE_TEST_CASE_P(
-        TestConvolution, smoke_ConvolutionInt8Test, ::testing::ValuesIn(conv_int8_test_cases), getTestCaseName);
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/detectionout_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/detectionout_tests.cpp
deleted file mode 100644
index 3e718de1510..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/detectionout_tests.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-struct detectionout_test_params {
-    std::string device_name;
-
-    size_t mb;
-
-    struct {
-        size_t c;
-    } in1;
-
-    struct {
-        size_t c;
-    } in2;
-
-    struct {
-        size_t c;
-        size_t h;
-        size_t w;
-    } in3;
-
-    struct {
-        size_t c;
-        size_t h;
-        size_t w;
-    } out;
-};
-
-class smoke_CPUDetectionOutOnlyTest: public TestsCommon,
-                             public WithParamInterface<detectionout_test_params> {
-
-    std::string model_t = R"V0G0N(
-<Net Name="PriorBox_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="input1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>_IC1_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="input2" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>_IC2_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="input3" type="Input" precision="FP32" id="3">
-            <output>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>_IC3_</dim>
-                    <dim>_IH3_</dim>
-                    <dim>_IW3_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="detection_out" type="DetectionOutput" precision="FP32" id="11">
-            <data num_classes="4" share_location="1" background_label_id="0" nms_threshold="0.400000" top_k="400"
-                  output_directory="" output_name_prefix="" output_format="" label_map_file=""
-                  name_size_file="" num_test_image="0" code_type="caffe.PriorBoxParameter.CENTER_SIZE"
-                  variance_encoded_in_target="0" keep_top_k="200" confidence_threshold="0.010000"
-                  visualize="0" visualize_threshold="0.000000" num_orient_classes="8"
-                  interpolate_orientation="1" clip="1" decrease_label_id="1" />
-            <input>
-                <port id="11">
-                    <dim>1</dim>
-                    <dim>_IC1_</dim>
-                </port>
-                <port id="12">
-                    <dim>1</dim>
-                    <dim>_IC2_</dim>
-                </port>
-                <port id="13">
-                    <dim>1</dim>
-                    <dim>_IC3_</dim>
-                    <dim>_IH3_</dim>
-                    <dim>_IW3_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="14">
-                    <dim>1</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="11" to-port="11"/>
-        <edge from-layer="2" from-port="2" to-layer="11" to-port="12"/>
-        <edge from-layer="3" from-port="3" to-layer="11" to-port="13"/>
-    </edges>
-
-</Net>
-)V0G0N";
-
-    std::string getModel(detectionout_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IC1_", p.in1.c);
-        REPLACE_WITH_NUM(model, "_IC2_", p.in2.c);
-
-        REPLACE_WITH_NUM(model, "_IC3_", p.in3.c);
-        REPLACE_WITH_NUM(model, "_IH3_", p.in3.h);
-        REPLACE_WITH_NUM(model, "_IW3_", p.in3.w);
-
-        REPLACE_WITH_NUM(model, "_OC_", p.out.c);
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h);
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            detectionout_test_params p = ::testing::WithParamInterface<detectionout_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, Blob::CPtr());
-            network.setBatchSize(p.mb);
-
-            InputsDataMap inputs = network.getInputsInfo();
-
-            DataPtr inputPtr1 = inputs["input1"]->getInputData();
-            DataPtr inputPtr2 = inputs["input2"]->getInputData();
-            DataPtr inputPtr3 = inputs["input3"]->getInputData();
-
-            InferenceEngine::Blob::Ptr input1 = InferenceEngine::make_shared_blob<float>(inputPtr1->getTensorDesc());
-            input1->allocate();
-
-            InferenceEngine::Blob::Ptr input2 = InferenceEngine::make_shared_blob<float>(inputPtr2->getTensorDesc());
-            input2->allocate();
-
-            InferenceEngine::Blob::Ptr input3 = InferenceEngine::make_shared_blob<float>(inputPtr3->getTensorDesc());
-            input3->allocate();
-
-            InferenceEngine::BlobMap inputBlobs;
-            inputBlobs["input1"] = input1;
-            inputBlobs["input2"] = input2;
-            inputBlobs["input3"] = input3;
-
-            OutputsDataMap outputs = network.getOutputsInfo();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(outputs["detection_out"]->getTensorDesc());
-            output->allocate();
-
-            InferenceEngine::BlobMap outputBlobs;
-            outputBlobs["detection_out"] = output;
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            inferRequest.SetInput(inputBlobs);
-            inferRequest.SetOutput(outputBlobs);
-            inferRequest.Infer();
-
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPUDetectionOutOnlyTest, TestsDetectionOut) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDetectionOut, smoke_CPUDetectionOutOnlyTest,
-        ::testing::Values(
-                detectionout_test_params{ "CPU",
-                    10, {147264}, {147264}, {2, 1, 147264}, {1, 200, 7} }));
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/fullycon_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/fullycon_tests.cpp
deleted file mode 100644
index 5730a3a3c35..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/fullycon_tests.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "ir_gen_helper.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace single_layer_tests;
-
-struct fc_base_params {
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    size_t out_c;
-};
-
-struct fc_test_params : fc_base_params {
-    std::string device_name;
-
-    fc_test_params(std::string name, fc_base_params params) :
-            fc_base_params(params), device_name(name) {}
-};
-
-template <typename data_t>
-void ref_innerproduct(const TBlob<data_t> &src, const data_t *weights, const size_t weightsSize,
-                      TBlob<data_t> &dst, fc_test_params prm)
-{
-    size_t IW = src.getTensorDesc().getDims()[3];
-    size_t IH = src.getTensorDesc().getDims()[2];
-    size_t IC = src.getTensorDesc().getDims()[1];
-
-    size_t OC = prm.out_c;
-
-    const data_t *src_data = src.readOnly();
-    const data_t *weights_data = weights;
-    const data_t *bias_data = weights_data + IW*IH*IC*OC;
-    data_t *dst_data = dst.data();
-
-    IE_ASSERT( IW*IH*IC*OC + OC == weightsSize);
-    IE_ASSERT( OC == dst.getTensorDesc().getDims()[1]);
-
-    for (size_t oc = 0; oc < OC; oc++) {
-        dst_data[oc] = bias_data[oc];
-        for (size_t ic = 0; ic < IC; ic++) {
-            for (size_t kh = 0; kh < IH; kh++) {
-                for (size_t  kw = 0; kw < IW; kw++) {
-                    size_t iidx = ic * IH * IW + kh * IW + kw;
-                    size_t widx = oc * IC * IH * IW
-                                    + ic * IH * IW + kh * IW + kw;
-
-                    dst_data[oc] += src_data[iidx] * weights_data[widx];
-                }
-            }
-        }
-    }
-}
-
-class smoke_FullyConnectedOnlyTest: public TestsCommon,
-                              public WithParamInterface<fc_test_params> {
-
-    std::string layers_t = R"V0G0N(
-        <layer name="FullyConnected" id="1" type="InnerProduct" precision="FP32">
-            <fc out-size="_OC_" />
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-)V0G0N";
-
-    std::string getModel(fc_test_params p) {
-        std::string model = layers_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", 1);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_OC_", p.out_c);
-
-        size_t w_data_size = (p.in.w * p.in.h * p.in.c * p.out_c )* sizeof(float);
-        size_t b_data_size = p.out_c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-
-        model = IRTemplateGenerator::getIRTemplate("FullyConnected_Only", {1lu, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            fc_test_params p = ::testing::WithParamInterface<fc_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            TBlob<uint8_t> *weights = new TBlob<uint8_t>({Precision::U8, {(p.in.w * p.in.h * p.in.c * p.out_c + p.out_c) * sizeof(float)}, Layout::C});
-            weights->allocate();
-            fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-            TBlob<uint8_t>::Ptr weights_ptr = TBlob<uint8_t>::Ptr(weights);
- 
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, weights_ptr);
-
-            SizeVector dims_src = {1,
-                                   p.in.c,
-                                   p.in.h,
-                                   p.in.w};
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc({ Precision::FP32, dims_src, Layout::NCHW }));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-            SizeVector dims_dst = {1, p.out_c};
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc({ Precision::FP32, dims_dst, Layout::NC }));
-            dst->allocate();
-
-            TBlob<float> dst_ref({Precision::FP32, dims_dst, Layout::NC});
-            dst_ref.allocate();
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.device_name);
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            OutputsDataMap outInfo;
-            outInfo = network.getOutputsInfo();
-            ASSERT_EQ(outInfo.size(), 1);
-            ASSERT_NE(outInfo.begin()->second, nullptr);
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(outInfo.begin()->first, dst);
-            inferRequest.Infer();
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-            ref_innerproduct(*srcPtr, weights->readOnly().as<const float *>(), weights->size() / sizeof(float), dst_ref, p);
-            compare(*dst, dst_ref, 0.9f);
-
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-#define case_1 fc_base_params({{227, 227, 3}, 96})
-#define case_2 fc_base_params({{227, 227, 4}, 8})
-
-TEST_P(smoke_FullyConnectedOnlyTest, TestsFullyConnected) {}
-
-std::string  getTestCaseName(testing::TestParamInfo<fc_test_params> obj) {
-    return  obj.param.device_name +
-        "_w" + std::to_string(obj.param.in.w) +
-        "_h" + std::to_string(obj.param.in.h) +
-        "_c" + std::to_string(obj.param.in.c) +
-        "_outc" + std::to_string(obj.param.out_c);
-}
-
-fc_test_params fc_only_test_cases[] = {
-		fc_test_params("CPU", case_1),
-		fc_test_params("CPU", case_2),
-};
-
-INSTANTIATE_TEST_CASE_P(
-        TestsFullyConnected, smoke_FullyConnectedOnlyTest, ::testing::ValuesIn(fc_only_test_cases), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_batchnorm_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_batchnorm_tests.cpp
deleted file mode 100644
index 88a58955c48..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_batchnorm_tests.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <vector>
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "ir_gen_helper.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace single_layer_tests;
-
-struct batchnorm4D_test_params {
-    std::string device_name;
-
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    double epsilon;
-};
-
-template <typename data_t>
-void ref_batchnorm4D(const TBlob<data_t> &src, const data_t *variance, const data_t *mean,
-                    TBlob<data_t> &dst, batchnorm4D_test_params prm) {
-    size_t IW = src.getTensorDesc().getDims()[3];
-    size_t IH = src.getTensorDesc().getDims()[2];
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t MB = src.getTensorDesc().getDims()[0];
-
-    const double eps = prm.epsilon;
-
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    for (int c = 0; c < IC; ++c) {
-        data_t v_mean = mean[c];
-        data_t v_variance = variance[c];
-        data_t sqrt_variance = 0;
-
-        sqrt_variance = 1. / sqrt(v_variance + eps);
-
-        for (int n = 0; n < MB; ++n)
-            for (int h = 0; h < IH; ++h)
-                for (int w = 0; w < IW; ++w) {
-                    size_t idx = n * IC * IH * IW
-                                 + c * IH * IW
-                                 + h * IW + w;
-                    dst_data[idx] = (src_data[idx] - v_mean) * sqrt_variance;
-                }
-    }
-}
-
-class smoke_CPUBatchNorn4DOnlyTest: public TestsCommon,
-                                public WithParamInterface<batchnorm4D_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="batchNorm" id="1" type="BatchNormalization" precision="FP32">
-            <batch_norm_data epsilon="_EPSILON_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-)V0G0N";
-    std::string getModel(batchnorm4D_test_params p) {
-        std::string model = layers_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", 1);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_EPSILON_", p.epsilon);
-
-        REPLACE_WITH_NUM(model, "_OW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_OH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_OC_", p.in.c);
-
-        size_t w_data_size = p.in.c * sizeof(float);
-        size_t b_data_size = p.in.c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-
-        model = IRTemplateGenerator::getIRTemplate("BatchNorm4D_Only", {1lu, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-        try {
-            batchnorm4D_test_params p = ::testing::WithParamInterface<batchnorm4D_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            TBlob<uint8_t> *weights = new TBlob<uint8_t>(TensorDesc(Precision::U8, {p.in.c * 2 * sizeof(float)}, C));
-            weights->allocate();
-            fill_data(weights->buffer(), weights->size() / sizeof(float));
-            float * data = weights->buffer();
-            for (size_t i = 0; i < weights->size() / sizeof(float); i++) {
-                if (data[i] < 0) {
-                    data[i] *= -1;
-                }
-            }
-
-            TBlob<uint8_t>::Ptr weights_ptr = TBlob<uint8_t>::Ptr(weights);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, weights_ptr);
-
-            SizeVector dims_src = {p.in.w,
-                                   p.in.h,
-                                   p.in.c,
-                                   1};          // 1 is a batch size
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            dst->allocate();
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            OutputsDataMap outInfo;
-            outInfo = network.getOutputsInfo();
-            ASSERT_EQ(outInfo.size(), 1);
-            ASSERT_NE(outInfo.begin()->second, nullptr);
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(outInfo.begin()->first, dst);
-            inferRequest.Infer();
-
-            TBlob<float> dst_ref(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            dst_ref.allocate();
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-            ref_batchnorm4D(*srcPtr, (const float*) weights->buffer(), ((const float*) weights->buffer() + p.in.c), dst_ref, p);
-
-            compare(*dst, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPUBatchNorn4DOnlyTest, TestsBatchNorm4D) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestBatchNorm4D, smoke_CPUBatchNorn4DOnlyTest,
-        ::testing::Values(
-                batchnorm4D_test_params{ "CPU",
-                                         {256, 128, 32}, 1e-6}));
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_deconv_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_deconv_tests.cpp
deleted file mode 100644
index 1b247042ddf..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_deconv_tests.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <string>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "../common_single_layer_tests/deconv_ref.hpp"
-#include "ir_gen_helper.hpp"
-#include "common_test_utils/common_layers_params.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace single_layer_tests;
-
-struct deconv_test_params {
-    std::string device_name;
-
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    size_t krn_w;
-    size_t krn_h;
-    size_t str_w;
-    size_t str_h;
-    size_t pad_w;
-    size_t pad_h;
-
-    size_t out_c;
-
-    bool with_bias;
-};
-
-template<typename data_t>
-void ref_deconv(const Blob::Ptr &src, const Blob::Ptr &weights, const Blob::Ptr &bias,
-                Blob::Ptr &dst_ref, deconv_test_params p) {
-    const float *weights_data = (const float *) weights->buffer();
-    size_t bias_size = p.out_c;
-    size_t weights_size = weights->size() / sizeof(float) - bias_size;
-    const float *bias_data = p.with_bias ? (const float *) bias->buffer() : nullptr;
-    CommonTestUtils::conv_common_params params;
-    params.kernel.insert(X_AXIS, p.krn_w);
-    params.kernel.insert(Y_AXIS, p.krn_h);
-    params.stride.insert(X_AXIS, p.str_w);
-    params.stride.insert(Y_AXIS, p.str_h);
-    params.pads_begin.insert(X_AXIS, p.pad_w);
-    params.pads_begin.insert(Y_AXIS, p.pad_h);
-    params.out_c = p.out_c;
-    ref_deconv_common<float>({ src }, *dst_ref.get(), weights_data, weights_size, bias_data, bias_size, params);
-}
-
-class smoke_CPUDeconvolutionOnlyTest : public TestsCommon,
-                                    public WithParamInterface<deconv_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="deconv1" id="1" type="Deconvolution" precision="FP32">
-            <deconvolution
-                kernel="_KH_,_KW_"
-                strides="_SH_,_SW_"
-                pads_begin="_PH_,_PW_"  pads_end="_PH_,_PW_"
-                output="_OC_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_OFF2_" size="_S2_" />
-
-            <input>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-    
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-)V0G0N";
-
-    std::string getModel(deconv_test_params p) {
-        std::string model = layers_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", 1);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-
-        REPLACE_WITH_NUM(model, "_KW_", p.krn_w);
-        REPLACE_WITH_NUM(model, "_KH_", p.krn_h);
-        REPLACE_WITH_NUM(model, "_SW_", p.str_w);
-        REPLACE_WITH_NUM(model, "_SH_", p.str_h);
-        REPLACE_WITH_NUM(model, "_PW_", p.pad_w);
-        REPLACE_WITH_NUM(model, "_PH_", p.pad_h);
-
-        REPLACE_WITH_NUM(model, "_OC_", p.out_c);
-        REPLACE_WITH_NUM(model, "_OH_", p.str_h * (p.in.h - 1) + p.krn_h - 2 * p.pad_h);
-        REPLACE_WITH_NUM(model, "_OW_", p.str_w * (p.in.w - 1) + p.krn_w - 2 * p.pad_w);
-
-        if (!p.with_bias) REMOVE_LINE(model, "<biases offset=\"_OFF2_\" size=\"_S2_\" />");
-
-        size_t w_data_size = (p.krn_w * p.krn_h * p.out_c * p.in.c) * sizeof(float);
-        size_t b_data_size = p.out_c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_OFF2_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-        
-        model = IRTemplateGenerator::getIRTemplate("Deconvolution_Only", {1lu, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-        try {
-            deconv_test_params p = ::testing::WithParamInterface<deconv_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            std::vector<Blob::Ptr> blob_to_model;
-            Blob::Ptr weights = make_shared_blob<float>(TensorDesc(Precision::FP32,
-                                                        {p.krn_w * p.krn_h * p.out_c * p.in.c}, C));
-            weights->allocate();
-            fill_data(weights->buffer().as<float *>(), weights->size());
-            blob_to_model.push_back(weights);
-
-            Blob::Ptr bias = nullptr;
-            if (p.with_bias) {
-                bias = make_shared_blob<float>(TensorDesc(Precision::FP32,
-                                               {p.krn_w * p.krn_h * p.out_c * p.in.c}, C));
-                bias->allocate();
-                fill_data(bias->buffer().as<float *>(), bias->size());
-                blob_to_model.push_back(bias);
-            }
-
-            size_t total_size_in_bytes = 0;
-            for (Blob::Ptr blb : blob_to_model) total_size_in_bytes += blb->byteSize();
-
-            TBlob<uint8_t>::Ptr model_blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, { total_size_in_bytes }, C));
-            model_blob->allocate();
-            uint8_t *model_blob_ptr = model_blob->buffer().as<uint8_t *>();
-            for (Blob::Ptr blb : blob_to_model) {
-                memcpy(model_blob_ptr, blb->buffer().as<uint8_t *>(), blb->byteSize());
-                model_blob_ptr += blb->byteSize();
-            }
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, model_blob);
-
-            SizeVector dims_src = {p.in.w, p.in.h, p.in.c, 1};  // 1 is a batch size
-
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-            size_t OW = p.str_w * (p.in.w - 1) + p.krn_w - 2 * p.pad_w;
-            size_t OH = p.str_h * (p.in.h - 1) + p.krn_h - 2 * p.pad_h;
-
-            SizeVector dims_dst = {OW, OH, p.out_c, 1};
-
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst->allocate();
-            fill_data(dst->buffer().as<float *>(), dst->size());
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            OutputsDataMap outInfo;
-            outInfo = network.getOutputsInfo();
-            ASSERT_EQ(outInfo.size(), 1);
-            ASSERT_NE(outInfo.begin()->second, nullptr);
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(outInfo.begin()->first, dst);
-            inferRequest.Infer();
-
-            Blob::Ptr dst_ref = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst_ref->allocate();
-
-            ref_deconv<float>(src, weights, bias, dst_ref, p);
-
-            compare(*dst.get(), *dst_ref.get());
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPUDeconvolutionOnlyTest, TestsDeconvolution) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestDeconvolution, smoke_CPUDeconvolutionOnlyTest,
-        ::testing::Values(
-                deconv_test_params{"CPU",
-                                   {3, 3, 3},
-                                   3, 3, 1, 1, 0, 0, 2, true},
-                deconv_test_params{"CPU",
-                                   {3, 3, 3},
-                                   4, 3, 1, 1, 0, 0, 2, true},
-                deconv_test_params{"CPU",
-                                   {3, 3, 3},
-                                   4, 3, 1, 2, 0, 0, 2, true},
-                deconv_test_params{"CPU",
-                                   {4, 4, 3},
-                                   3, 3, 1, 2, 0, 0, 2, true}, // jit impl should work
-                deconv_test_params{"CPU",
-                                   {4, 4, 3},
-                                   3, 3, 1, 2, 0, 0, 2, false}, // jit impl should work
-                deconv_test_params{"CPU",
-                                   {3, 3, 3},
-                                   3, 3, 1, 1, 0, 0, 2, false},
-                deconv_test_params{"CPU",
-                                   {3, 3, 3},
-                                   4, 3, 1, 1, 0, 0, 2, false},
-                deconv_test_params{"CPU",
-                                   {3, 3, 3},
-                                   4, 3, 1, 2, 0, 0, 2, false}));
-
-
-/*** TBD ***/
-
-
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_logistic_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_logistic_tests.cpp
deleted file mode 100644
index 99363c68877..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_logistic_tests.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "ir_gen_helper.hpp"
-
-#include <math.h>
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace single_layer_tests;
-
-struct logistic_test_params {
-    std::string device_name;
-
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-};
-
-template <typename T>
-T logistic_fwd(T s) {
-    T v = ::expf((float)(s));
-    return v / (v + 1);
-}
-
-template<typename data_t>
-void ref_logistic(const TBlob<data_t> &src, TBlob<data_t> &dst, logistic_test_params prm) {
-    data_t *dst_data = dst.data();
-
-    const data_t *src_data = src.readOnly();
-
-    for (int i = 0; i < src.size(); i++) {
-        dst_data[i] = logistic_fwd(src_data[i]);
-    }
-}
-
-class smoke_CPULogisticOnlyTest : public TestsCommon,
-                               public WithParamInterface<logistic_test_params> {
-
-    std::string layers_t = R"V0G0N(
-        <layer name="logistic" id="1" type="Logistic" precision="FP32">
-            <input>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-)V0G0N";
-
-    std::string getModel(logistic_test_params p) {
-        std::string model = layers_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", 1);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        model = IRTemplateGenerator::getIRTemplate("Logistic_Only", {1lu, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t);
-        return model;
-    }
-
- protected:
-    virtual void SetUp() {
-
-        try {
-            logistic_test_params p = ::testing::WithParamInterface<logistic_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork network;
-            ASSERT_NO_THROW(network = ie.ReadNetwork(model, Blob::CPtr()));
-
-            SizeVector dims_src = {p.in.w,
-                                   p.in.h,
-                                   p.in.c,
-                                   1};
-
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-            SizeVector dims_dst = dims_src;
-
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst->allocate();
-
-            TBlob<float> dst_ref(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst_ref.allocate();
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-            ref_logistic(*srcPtr, dst_ref, p);
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(network.getOutputsInfo().begin()->first, dst);
-            inferRequest.Infer();
-
-            compare(*dst, dst_ref);
-
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPULogisticOnlyTest, TestsLogistic) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestLogistic, smoke_CPULogisticOnlyTest,
-    ::testing::Values(
-        logistic_test_params{"CPU",
-                            {13, 13, 8}}
-    )
-);
-
-/*** TBD ***/
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_power_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_power_tests.cpp
deleted file mode 100644
index 8437a12f1d2..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_power_tests.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-#include <cmath>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "ir_gen_helper.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace single_layer_tests;
-
-struct power_test_params {
-    std::string device_name;
-
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    float power;
-    float scale;
-    float shift;
-};
-
-template <typename data_t>
-void ref_power(const TBlob<data_t> &src, TBlob<data_t> &dst, power_test_params prm) {
-
-    data_t *dst_data = dst.data();
-    const data_t *src_data = src.readOnly();
-
-    const double scale = prm.scale;
-    const double power = prm.power;
-    const double shift = prm.shift;
-
-    for(int i = 0; i < src.size(); i++) {
-        dst_data[i] = (float)std::pow(shift + src_data[i] * scale, power);
-    }
-}
-
-class smoke_CPUPowerOnlyTest: public TestsCommon,
-                           public WithParamInterface<power_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="power" id="1" type="Power" precision="FP32">
-            <power_data power="_POWER_" scale="_SCALE_" shift="_SHIFT_"/>
-            <input>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-    
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-)V0G0N";
-
-    std::string getModel(power_test_params p) {
-        std::string model = layers_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", 1);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_POWER_", p.power);
-        REPLACE_WITH_NUM(model, "_SCALE_", p.scale);
-        REPLACE_WITH_NUM(model, "_SHIFT_", p.shift);
-
-        model = IRTemplateGenerator::getIRTemplate("Power_Only", {1lu, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            power_test_params p = ::testing::WithParamInterface<power_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, Blob::CPtr());
-
-            SizeVector dims_src = {p.in.w,
-                                   p.in.h,
-                                   p.in.c,
-                                   1};
-
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-            SizeVector dims_dst = dims_src;
-
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst->allocate();
-
-            TBlob<float> dst_ref(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst_ref.allocate();
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-            ref_power(*srcPtr, dst_ref, p);
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            OutputsDataMap outInfo;
-            outInfo = network.getOutputsInfo();
-            ASSERT_EQ(outInfo.size(), 1);
-            ASSERT_NE(outInfo.begin()->second, nullptr);
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(outInfo.begin()->first, dst);
-            inferRequest.Infer();
-
-            compare(*dst, dst_ref);
-
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPUPowerOnlyTest, TestsPower) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestPower, smoke_CPUPowerOnlyTest,
-        ::testing::Values(
-                power_test_params{ "CPU",
-                    {13, 13, 3}, 1, 2, 0.5f },
-                power_test_params{ "CPU",
-                    {23, 23, 1}, 3, 8, 2 },
-                power_test_params{ "CPU",
-                    {23, 23, 8}, 8, 2, 1 }));
-
-/*** TBD ***/
-
-
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_roipooling_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_roipooling_tests.cpp
deleted file mode 100644
index 12f39d4ed47..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_roipooling_tests.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include <ie_core.hpp>
-#include "ir_gen_helper.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace single_layer_tests;
-
-struct roipooling_test_params {
-    std::string device_name;
-
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    size_t pooled_h;
-    size_t pooled_w;
-    float spatial_scale;
-};
-
-template <typename data_t>
-void ref_roipool(const TBlob<data_t> &src, TBlob<data_t> &dst, roipooling_test_params prm)
-{
-}
-
-class MKLDNNROIPoolingOnlyTest: public TestsCommon,
-                             public WithParamInterface<roipooling_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="roi_pool" type="ROIPooling" precision="FP32" id="1">
-            <data pooled_h="_POOLED_H_" pooled_w="_POOLED_H_" spatial_scale="_SPATIAL_SCALE_"/>
-            <input>
-                <port id="10">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IW_</dim>
-                    <dim>_IH_</dim>
-                </port>
-                <port id="11">
-                    <dim>300</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="12">
-                    <dim>300</dim>
-                    <dim>256</dim>
-                    <dim>6</dim>
-                    <dim>6</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-)V0G0N";
-    
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="10"/>
-)V0G0N";
-
-    std::string getModel(roipooling_test_params p) {
-        std::string model = layers_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", 1);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-
-        REPLACE_WITH_NUM(model, "_POOLED_H_", p.pooled_h);
-        REPLACE_WITH_NUM(model, "_POOLED_W_", p.pooled_w);
-        REPLACE_WITH_NUM(model, "_SPATIAL_SCALE_", p.spatial_scale);
-
-        model = IRTemplateGenerator::getIRTemplate("ROIPooling_Only", {1lu, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            roipooling_test_params p = ::testing::WithParamInterface<roipooling_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core ie;
-            ASSERT_NO_THROW(ie.ReadNetwork(model, Blob::CPtr()));
-
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNROIPoolingOnlyTest, nightly_TestsROIPooling) {}
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_scaleshift_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_scaleshift_tests.cpp
deleted file mode 100644
index bc1915393ff..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_scaleshift_tests.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "ir_gen_helper.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace single_layer_tests;
-
-struct scaleshift_test_params {
-    std::string device_name;
-
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    int broadcast;
-};
-
-template <typename data_t>
-void ref_scaleshift(const TBlob<data_t> &src, const data_t *weights, const size_t weightsSize,
-              TBlob<data_t> &dst, scaleshift_test_params prm) {
-
-    size_t IW = src.getTensorDesc().getDims()[3];
-    size_t IH = src.getTensorDesc().getDims()[2];
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t MB = src.getTensorDesc().getDims()[0];
-
-    const data_t *src_data = src.readOnly();
-    const data_t *weights_data = weights;
-    const data_t *bias_data = weights_data + IC;
-    data_t *dst_data = dst.data();
-
-    for(int mb = 0; mb < MB; mb++) {
-        for(int c = 0; c < IC; c++) {
-            for(int h = 0; h < IH; h++) {
-                for(int w = 0; w < IW; w++) {
-                    int idx = mb * IC * IH * IW
-                        + c * IH * IW
-                        + h * IW + w;
-
-                    int widx = c;
-                    int bidx = c;
-
-                    dst_data[idx] = src_data[idx] * weights_data[widx] + bias_data[bidx];
-                }
-            }
-        }
-    }
-}
-
-class smoke_CPUScaleShiftOnlyTest: public TestsCommon,
-                           public WithParamInterface<scaleshift_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="scaleshift" id="1" type="ScaleShift" precision="FP32">
-            <data broadcast="_BROADCAST_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-    
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-)V0G0N";
-
-    std::string getModel(scaleshift_test_params p) {
-        std::string model = layers_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", 1);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_BROADCAST_", p.broadcast);
-
-        size_t w_data_size = p.in.c * sizeof(float);
-        size_t b_data_size = p.in.c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-
-        model = IRTemplateGenerator::getIRTemplate("ScaleShift_Only", {1lu, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            scaleshift_test_params p = ::testing::WithParamInterface<scaleshift_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            TBlob<uint8_t> *weights = new TBlob<uint8_t>(TensorDesc(Precision::U8, { p.in.c * 2 * sizeof(float) }, C));
-            weights->allocate();
-            fill_data( weights->data().as<float*>(), weights->size() / sizeof(float));
-
-            TBlob<uint8_t>::Ptr weights_ptr = TBlob<uint8_t>::Ptr(weights);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, weights_ptr);
-
-            SizeVector dims_src = {p.in.w,
-                                   p.in.h,
-                                   p.in.c,
-                                   1};          // 1 is a batch size
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            dst->allocate();
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            OutputsDataMap outInfo;
-            outInfo = network.getOutputsInfo();
-            ASSERT_EQ(outInfo.size(), 1);
-            ASSERT_NE(outInfo.begin()->second, nullptr);
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(outInfo.begin()->first, dst);
-            inferRequest.Infer();
-
-
-            TBlob<float> dst_ref(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            dst_ref.allocate();
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-            ref_scaleshift(*srcPtr, weights->readOnly().as<const float*>(), weights->size() / sizeof(float), dst_ref, p);
-
-            compare(*dst, dst_ref);
-
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPUScaleShiftOnlyTest, TestsScaleShift) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestScaleShift, smoke_CPUScaleShiftOnlyTest,
-        ::testing::Values(
-                scaleshift_test_params{ "CPU",
-                                  {256, 128, 32}, 0}));
-
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_simplernms_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_simplernms_tests.cpp
deleted file mode 100644
index 1ddfe5a80e7..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/mkldnn_simplernms_tests.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "ir_gen_helper.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace single_layer_tests;
-
-struct simplernms_test_params {
-    std::string device_name;
-
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    float cls_threshold;
-    size_t max_num_proposals;
-    float iou_threshold;
-    size_t min_bbox_size;
-    size_t feat_stride;
-    size_t pre_nms_topn;
-    size_t post_nms_topn;
-    float scale1;
-    float scale2;
-    float scale3;
-};
-
-template <typename data_t>
-void ref_simplernms(const TBlob<data_t> &src, TBlob<data_t> &dst, simplernms_test_params prm)
-{
-}
-
-class MKLDNNSimplerNMSOnlyTest: public TestsCommon,
-                             public WithParamInterface<simplernms_test_params> {
-
-    std::string layers_t = R"V0G0N(
-        <layer name="power" id="1" type="Power" precision="FP32">
-            <power_data power="1" scale="1" shift="0"/>
-            <input>
-                <port id="0"/>
-            </input>
-            <output>
-                <port id="1"/>
-            </output>
-        </layer>
-        <layer name="proposal" type="SimplerNMS" precision="FP32" id="2">
-            <data cls_threshold="_CLS_THR_" max_num_proposals="_MAX_NUM_"
-                iou_threshold="_IOU_THR_" min_bbox_size="_MIN_BB_SIZE_" feat_stride="_FEAT_STRIDE_"
-                pre_nms_topn="_PRE_NMS_TOPN_" post_nms_topn="_POST_NMS_TOPN_"
-                scale="_SCALE1_,_SCALE2_,_SCALE3_"/>
-            <input>
-                <port id="2">
-                    <dim>18</dim>
-                    <dim>39</dim>
-                    <dim>64</dim>
-                </port>
-                <port id="3">
-                    <dim>18</dim>
-                    <dim>39</dim>
-                    <dim>64</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>300</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-    
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="2"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="3"/>
-)V0G0N";
-
-    std::string getModel(simplernms_test_params p) {
-        std::string model = layers_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-
-        REPLACE_WITH_NUM(model, "_CLS_THR_", p.cls_threshold);
-        REPLACE_WITH_NUM(model, "_MAX_NUM_", p.max_num_proposals);
-        REPLACE_WITH_NUM(model, "_IOU_THR_", p.iou_threshold);
-        REPLACE_WITH_NUM(model, "_MIN_BB_SIZE_", p.min_bbox_size);
-        REPLACE_WITH_NUM(model, "_FEAT_STRIDE_", p.feat_stride);
-        REPLACE_WITH_NUM(model, "_PRE_NMS_TOPN_", p.pre_nms_topn);
-        REPLACE_WITH_NUM(model, "_POST_NMS_TOPN_", p.post_nms_topn);
-        REPLACE_WITH_NUM(model, "_SCALE1_", p.scale1);
-        REPLACE_WITH_NUM(model, "_SCALE2_", p.scale2);
-        REPLACE_WITH_NUM(model, "_SCALE3_", p.scale3);
-
-        model = IRTemplateGenerator::getIRTemplate("SimplerNMS_Only", {1lu, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            simplernms_test_params p = ::testing::WithParamInterface<simplernms_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, Blob::CPtr());
-
-            SizeVector dims_src = {p.in.w,
-                p.in.h,
-                p.in.c,
-                1};
-
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-            SizeVector dims_dst = {300, 5, 1};
-
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst->allocate();
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            OutputsDataMap outInfo;
-            outInfo = network.getOutputsInfo();
-            ASSERT_EQ(outInfo.size(), 1);
-            ASSERT_NE(outInfo.begin()->second, nullptr);
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(outInfo.begin()->first, dst);
-            inferRequest.Infer();
-
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNSimplerNMSOnlyTest, nightly_TestSimplerNMS) {}
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/norm_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/norm_tests.cpp
deleted file mode 100644
index d950ce5492e..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/norm_tests.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "ir_gen_helper.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace single_layer_tests;
-
-
-struct norm_base_params {
-    struct {
-        size_t w;
-        size_t h;
-        size_t c;
-    } in;
-
-    size_t local_size;
-    float alpha;
-    float beta;
-    size_t k;
-
-};
-
-struct norm_test_params : norm_base_params {
-    std::string device_name;
-
-    norm_test_params(std::string name, norm_base_params params) :
-            norm_base_params(params), device_name(name) {}
-};
-
-
-template <typename data_t>
-void ref_norm(const TBlob<data_t> &src, TBlob<data_t> &dst, norm_test_params prm)
-{
-    size_t IW = prm.in.w;
-    size_t IH = prm.in.h;
-    size_t IC = prm.in.c;
-
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-        for (uint32_t c = 0; c < IC; c++) {
-            for (uint32_t h = 0; h < IH; h++) {
-                for (uint32_t w = 0; w < IW; w++) {
-                    uint32_t oidx = c * IH * IW
-                                    + h * IW + w;
-
-                    uint32_t sz = prm.local_size;
-                    int32_t c_start = c - sz / 2;
-                    int32_t c_end = c_start + sz;
-                    if (c_start < 0) c_start = 0;
-                    if (c_end > (int32_t)IC) c_end = IC;
-                    data_t sum = 0.0;
-                    for (int32_t c1 = c_start; c1 < c_end; c1++) {
-                        uint32_t idx = c1 * IH * IW + h * IW + w;
-                        data_t s = src_data[idx];
-
-                        sum += s * s;
-                    }
-
-                    data_t norm_coef = powf(1. + prm.alpha * sum / sz, -prm.beta);
-                    dst_data[oidx] = norm_coef * src_data[oidx];
-                }
-            }
-        }
-}
-
-class smoke_NormOnlyTest: public TestsCommon,
-                    public WithParamInterface<norm_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="norm" id="1" type="LRN" precision="FP32">
-            <lrn local_size="_LS_" alpha="_A__" beta="_B__" k="_K__" region="ACROSS" />
-
-            <input>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-    
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-)V0G0N";
-
-    std::string getModel(norm_test_params p) {
-        std::string model = layers_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", 1);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-
-        REPLACE_WITH_NUM(model, "_LS_", p.local_size);
-        REPLACE_WITH_NUM(model, "_A__", p.alpha);
-        REPLACE_WITH_NUM(model, "_B__", p.beta);
-        REPLACE_WITH_NUM(model, "_K__", p.k);
-
-        model = IRTemplateGenerator::getIRTemplate("FullyConnected_Only", {1lu, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            norm_test_params p = ::testing::WithParamInterface<norm_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, Blob::CPtr());
-
-            SizeVector dims_src = {1,
-                                   p.in.c,
-                                   p.in.h,
-                                   p.in.w};
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc({ Precision::FP32, dims_src, Layout::NCHW }));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-            SizeVector dims_dst = dims_src;
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc({ Precision::FP32, dims_dst, Layout::NCHW }));
-            dst->allocate();
-
-            TBlob<float> dst_ref({Precision::FP32, dims_dst, Layout::NCHW});
-            dst_ref.allocate();
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.device_name);
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            OutputsDataMap outInfo;
-            outInfo = network.getOutputsInfo();
-            ASSERT_EQ(outInfo.size(), 1);
-            ASSERT_NE(outInfo.begin()->second, nullptr);
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(outInfo.begin()->first, dst);
-            inferRequest.Infer();
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-            ref_norm(*srcPtr, dst_ref, p);
-            compare(*dst, dst_ref);
-
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-#define case_1 norm_base_params({{228, 228, 3}, 5, 0.0001f, 0.75f, 1})
-
-TEST_P(smoke_NormOnlyTest, TestsNorm) {}
-
-std::string  getTestCaseName(testing::TestParamInfo<norm_test_params> obj) {
-    return  obj.param.device_name +
-        "_w" + std::to_string(obj.param.in.w) +
-        "_h" + std::to_string(obj.param.in.h) +
-        "_c" + std::to_string(obj.param.in.c);
-}
-
-norm_test_params norm_only_test_cases[] = {
-		norm_test_params("CPU", case_1),
-};
-
-INSTANTIATE_TEST_CASE_P(
-        TestsNorm, smoke_NormOnlyTest, ::testing::ValuesIn(norm_only_test_cases), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/pooling_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/pooling_tests.cpp
deleted file mode 100644
index 8db87e9c59b..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/pooling_tests.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-#include "ie_core.hpp"
-#include "../common_single_layer_tests/pool_ref.hpp"
-#include "common_test_utils/common_layers_params.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-struct pooling_base_params {
-    struct { size_t n, c, h, w; } in;
-    struct { size_t h, w; } out;
-
-    size_t krn_h;
-    size_t krn_w;
-    size_t str_h;
-    size_t str_w;
-    size_t pad_h;
-    size_t pad_w;
-
-    bool avg;
-    bool exclude_pad;
-};
-
-struct pooling_test_params : pooling_base_params {
-    std::string device_name;
-
-    pooling_test_params(std::string name, pooling_base_params params) :
-            pooling_base_params(params), device_name(name) {}
-};
-
-template <typename data_t>
-void ref_pool(const Blob::Ptr &src, Blob::Ptr &dst, pooling_test_params p)
-{
-    CommonTestUtils::pool_common_params params;
-    params.kernel.insert(X_AXIS, p.krn_w);
-    params.kernel.insert(Y_AXIS, p.krn_h);
-    params.stride.insert(X_AXIS, p.str_w);
-    params.stride.insert(Y_AXIS, p.str_h);
-    params.pads_begin.insert(X_AXIS, p.pad_w);
-    params.pads_begin.insert(Y_AXIS, p.pad_h);
-    params.exclude_pad = p.exclude_pad;
-    params.avg = p.avg;
-    ref_pool_common<float>({ src }, *dst.get(), params);
-}
-
-class smoke_CPU_PoolingOnlyTest: public TestsCommon,
-                       public WithParamInterface<pooling_test_params> {
-
-    std::string model_t = R"V0G0N(
-<net name="Pooling_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer id="1" name="input" precision="FP32" type="Input">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="2" name="pool" type="Pooling" precision="FP32">
-
-            <data
-                exclude-pad="_EXCL_PAD_"
-                pool-method="_ALG_"
-                kernel-x="_KW_" kernel-y="_KH_"
-                pad-x="_PW_" pad-y="_PH_"
-                stride-x="_SW_" stride-y="_SH_"  />
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(pooling_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-
-        REPLACE_WITH_NUM(model, "_KH_", p.krn_h);
-        REPLACE_WITH_NUM(model, "_KW_", p.krn_w);
-        REPLACE_WITH_NUM(model, "_SH_", p.str_h);
-        REPLACE_WITH_NUM(model, "_SW_", p.str_w);
-        REPLACE_WITH_NUM(model, "_PH_", p.pad_h);
-        REPLACE_WITH_NUM(model, "_PW_", p.pad_w);
-
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h);
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-
-        REPLACE_WITH_STR(model, "_ALG_", p.avg ? "avg":"max");
-        REPLACE_WITH_STR(model, "_EXCL_PAD_", p.exclude_pad ? "true":"false");
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            pooling_test_params p = ::testing::WithParamInterface<pooling_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, Blob::CPtr());
-
-            SizeVector dims_src = {p.in.w, p.in.h, p.in.c, p.in.n};
-            Blob::Ptr src = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_src.rbegin(), dims_src.rend()), NCHW));
-            src->allocate();
-            fill_data(src->buffer().as<float *>(), src->size());
-
-            SizeVector dims_dst = {p.out.w, p.out.h, p.in.c, p.in.n};
-            Blob::Ptr dst = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst->allocate();
-
-            Blob::Ptr dst_ref = make_shared_blob<float>(TensorDesc(Precision::FP32, SizeVector(dims_dst.rbegin(), dims_dst.rend()), NCHW));
-            dst_ref->allocate();
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.device_name);
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            OutputsDataMap outInfo;
-            outInfo = network.getOutputsInfo();
-            ASSERT_EQ(outInfo.size(), 1);
-            ASSERT_NE(outInfo.begin()->second, nullptr);
-            inferRequest.SetBlob(network.getInputsInfo().begin()->first, src);
-            inferRequest.SetBlob(outInfo.begin()->first, dst);
-            inferRequest.Infer();
-
-            ref_pool<float>(src, dst_ref, p);
-            compare(*dst.get(), *dst_ref.get());
-
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-#define case_0 pooling_base_params({{1, 3, 228, 228}, {114, 114}, 2, 2, 2, 2, 0, 0})
-#define case_1 pooling_base_params({{1, 3, 228, 228}, {113, 114}, 4, 2, 2, 2, 0, 0})
-#define case_2 pooling_base_params({{1, 3, 228, 228}, {113, 227}, 4, 2, 2, 1, 0, 0})
-#define case_3 pooling_base_params({{1, 3, 224, 224}, {224, 224}, 3, 3, 1, 1, 1, 1, false, false})
-#define case_4 pooling_base_params({{1, 3, 224, 224}, {224, 224}, 3, 3, 1, 1, 1, 1, true, false})
-#define case_5 pooling_base_params({{1, 3, 224, 224}, {224, 224}, 3, 3, 1, 1, 1, 1, true, true})
-
-#define case_6 pooling_base_params({{1, 3, 224, 224}, {112, 112}, 3, 3, 2, 2, 1, 1, false, false})
-#define case_7 pooling_base_params({{1, 3, 224, 224}, {112, 112}, 3, 3, 2, 2, 1, 1, true, false})
-#define case_8 pooling_base_params({{1, 3, 224, 224}, {112, 112}, 3, 3, 2, 2, 1, 1, true, true})
-
-#define case_9  pooling_base_params({{1, 3, 224, 224}, {113, 113}, 3, 3, 2, 2, 1, 1, false, false})
-#define case_10 pooling_base_params({{1, 3, 224, 224}, {113, 113}, 3, 3, 2, 2, 1, 1, true, false})
-#define case_11 pooling_base_params({{1, 3, 224, 224}, {113, 113}, 3, 3, 2, 2, 1, 1, true, true})
-
-
-TEST_P(smoke_CPU_PoolingOnlyTest, TestsPooling) {}
-
-std::string  getTestCaseName(testing::TestParamInfo<pooling_test_params> obj) {
-    return  obj.param.device_name +
-        "_w" + std::to_string(obj.param.in.w) +
-        "_h" + std::to_string(obj.param.in.h) +
-        "_c" + std::to_string(obj.param.in.c) +
-        "_krnw" + std::to_string(obj.param.krn_w) +
-        "_krnh" + std::to_string(obj.param.krn_h) +
-        "_strw" + std::to_string(obj.param.str_w) +
-        "_strh" + std::to_string(obj.param.str_h);
-}
-
-pooling_test_params pooling_only_test_cases[] = {
-        pooling_test_params("CPU", case_0),
-        pooling_test_params("CPU", case_1),
-		pooling_test_params("CPU", case_2),
-		pooling_test_params("CPU", case_3),
-        pooling_test_params("CPU", case_4),
-        pooling_test_params("CPU", case_5),
-        pooling_test_params("CPU", case_6),
-        pooling_test_params("CPU", case_7),
-        pooling_test_params("CPU", case_8),
-        pooling_test_params("CPU", case_9),
-//        pooling_test_params("CPU", case_10),
-        pooling_test_params("CPU", case_11),
-};
-
-INSTANTIATE_TEST_CASE_P(
-        TestsPooling, smoke_CPU_PoolingOnlyTest, ::testing::ValuesIn(pooling_only_test_cases));
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/priorbox_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/priorbox_tests.cpp
deleted file mode 100644
index 44a6a976db9..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/priorbox_tests.cpp
+++ /dev/null
@@ -1,369 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-#include <ie_core.hpp>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-struct priorbox_test_params {
-    std::string device_name;
-
-    size_t mb;
-
-    struct {
-        size_t c;
-        size_t h;
-        size_t w;
-    } in1;
-
-    struct {
-        size_t c;
-        size_t h;
-        size_t w;
-    } in2;
-
-    struct {
-        size_t c;
-        size_t h;
-        size_t w;
-    } out;
-
-    int offset;
-    int stride;
-    int min_size;
-    int max_size;
-    bool flip;
-    bool clip;
-};
-
-class smoke_CPUPriorBoxOnlyTest: public TestsCommon,
-                             public WithParamInterface<priorbox_test_params> {
-
-    std::string model_t = R"V0G0N(
-<Net Name="PriorBox_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="input1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>_IC1_</dim>
-                    <dim>_IH1_</dim>
-                    <dim>_IW1_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="input2" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>_IC2_</dim>
-                    <dim>_IH2_</dim>
-                    <dim>_IW2_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="prior" type="PriorBox" precision="FP32" id="2">
-            <data min_size="4.000000" max_size="9.000000" flip="1" clip="1" offset="0" step="0" aspect_ratio="" variance=""/>
-            <input>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>_IC1_</dim>
-                    <dim>_IH1_</dim>
-                    <dim>_IW1_</dim>
-                </port>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>_IC2_</dim>
-                    <dim>_IH2_</dim>
-                    <dim>_IW2_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="3"/>
-    </edges>
-
-</Net>
-)V0G0N";
-
-    std::string getModel(priorbox_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW1_", p.in1.w);
-        REPLACE_WITH_NUM(model, "_IH1_", p.in1.h);
-        REPLACE_WITH_NUM(model, "_IC1_", p.in1.c);
-
-        REPLACE_WITH_NUM(model, "_IW2_", p.in2.w);
-        REPLACE_WITH_NUM(model, "_IH2_", p.in2.h);
-        REPLACE_WITH_NUM(model, "_IC2_", p.in2.c);
-
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h);
-        REPLACE_WITH_NUM(model, "_OC_", p.out.c);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            priorbox_test_params p = ::testing::WithParamInterface<priorbox_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, Blob::CPtr());
-            network.setBatchSize(p.mb);
-
-            InputsDataMap inputs = network.getInputsInfo();
-
-            DataPtr inputPtr1 = inputs["input1"]->getInputData();
-            DataPtr inputPtr2 = inputs["input2"]->getInputData();
-
-            InferenceEngine::Blob::Ptr input1 = InferenceEngine::make_shared_blob<float>(inputPtr1->getTensorDesc());
-            input1->allocate();
-
-            InferenceEngine::Blob::Ptr input2 = InferenceEngine::make_shared_blob<float>(inputPtr2->getTensorDesc());
-            input2->allocate();
-
-            InferenceEngine::BlobMap inputBlobs;
-            inputBlobs["input1"] = input1;
-            inputBlobs["input2"] = input2;
-
-            OutputsDataMap outputs = network.getOutputsInfo();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(outputs["prior"]->getTensorDesc());
-            output->allocate();
-
-            InferenceEngine::BlobMap outputBlobs;
-            outputBlobs["prior"] = output;
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            inferRequest.SetInput(inputBlobs);
-            inferRequest.SetOutput(outputBlobs);
-            inferRequest.Infer();
-
-            // Check results
-
-            const TBlob<float>::Ptr outputArray = std::dynamic_pointer_cast<TBlob<float>>(output);
-            float* dst_ptr = outputArray->data();
-
-            const float eps = 1e-6;
-
-            // pick a few generated priors and compare against the expected number.
-            // first prior
-            EXPECT_NEAR(dst_ptr[0], 0.03, eps);
-            EXPECT_NEAR(dst_ptr[1], 0.03, eps);
-            EXPECT_NEAR(dst_ptr[2], 0.07, eps);
-            EXPECT_NEAR(dst_ptr[3], 0.07, eps);
-            // second prior
-            EXPECT_NEAR(dst_ptr[4], 0.02, eps);
-            EXPECT_NEAR(dst_ptr[5], 0.02, eps);
-            EXPECT_NEAR(dst_ptr[6], 0.08, eps);
-            EXPECT_NEAR(dst_ptr[7], 0.08, eps);
-            // prior in the 5-th row and 5-th col
-            EXPECT_NEAR(dst_ptr[4*10*2*4+4*2*4], 0.43, eps);
-            EXPECT_NEAR(dst_ptr[4*10*2*4+4*2*4+1], 0.43, eps);
-            EXPECT_NEAR(dst_ptr[4*10*2*4+4*2*4+2], 0.47, eps);
-            EXPECT_NEAR(dst_ptr[4*10*2*4+4*2*4+3], 0.47, eps);
-
-            // check variance
-            dst_ptr += p.out.h * p.out.w;
-            for (int d = 0; d < p.out.h * p.out.w; ++d) {
-                EXPECT_NEAR(dst_ptr[d], 0.1, eps);
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPUPriorBoxOnlyTest, TestsPriorBox) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsPriorBox, smoke_CPUPriorBoxOnlyTest,
-        ::testing::Values(
-                priorbox_test_params{ "CPU",
-                    10, {10, 10, 10}, {3, 100, 100}, {2, 1, 800}, 0, 0, 4, 9, true, true }));
-
-
-class smoke_CPUPriorBoxDensityTest : public TestsCommon,
-    public WithParamInterface<priorbox_test_params> {
-
-    std::string model_t = R"V0G0N(
-<Net Name="PriorBox_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="input1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>_IC1_</dim>
-                    <dim>_IH1_</dim>
-                    <dim>_IW1_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="input2" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>_IC2_</dim>
-                    <dim>_IH2_</dim>
-                    <dim>_IW2_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="prior" type="PriorBox" precision="FP32" id="2">
-            <data fixed_size="4.000000" density="1.000000" flip="1" clip="1" offset="0" step="0" aspect_ratio="1.0" variance=""/>
-            <input>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>_IC1_</dim>
-                    <dim>_IH1_</dim>
-                    <dim>_IW1_</dim>
-                </port>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>_IC2_</dim>
-                    <dim>_IH2_</dim>
-                    <dim>_IW2_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="3"/>
-    </edges>
-
-</Net>
-)V0G0N";
-
-    std::string getModel(priorbox_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW1_", p.in1.w);
-        REPLACE_WITH_NUM(model, "_IH1_", p.in1.h);
-        REPLACE_WITH_NUM(model, "_IC1_", p.in1.c);
-
-        REPLACE_WITH_NUM(model, "_IW2_", p.in2.w);
-        REPLACE_WITH_NUM(model, "_IH2_", p.in2.h);
-        REPLACE_WITH_NUM(model, "_IC2_", p.in2.c);
-
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h);
-        REPLACE_WITH_NUM(model, "_OC_", p.out.c);
-
-        return model;
-    }
-
-protected:
-    virtual void SetUp() {
-
-        try {
-            priorbox_test_params p = ::testing::WithParamInterface<priorbox_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork network = ie.ReadNetwork(model, Blob::CPtr());
-            network.setBatchSize(p.mb);
-
-            InputsDataMap inputs = network.getInputsInfo();
-
-            DataPtr inputPtr1 = inputs["input1"]->getInputData();
-            DataPtr inputPtr2 = inputs["input2"]->getInputData();
-
-            InferenceEngine::Blob::Ptr input1 = InferenceEngine::make_shared_blob<float>(inputPtr1->getTensorDesc());
-            input1->allocate();
-
-            InferenceEngine::Blob::Ptr input2 = InferenceEngine::make_shared_blob<float>(inputPtr2->getTensorDesc());
-            input2->allocate();
-
-            InferenceEngine::BlobMap inputBlobs;
-            inputBlobs["input1"] = input1;
-            inputBlobs["input2"] = input2;
-
-            OutputsDataMap outputs = network.getOutputsInfo();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(outputs["prior"]->getTensorDesc());
-            output->allocate();
-
-            InferenceEngine::BlobMap outputBlobs;
-            outputBlobs["prior"] = output;
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            inferRequest.SetInput(inputBlobs);
-            inferRequest.SetOutput(outputBlobs);
-            inferRequest.Infer();
-
-            // Check results
-
-            const TBlob<float>::Ptr outputArray = std::dynamic_pointer_cast<TBlob<float>>(output);
-            float* dst_ptr = outputArray->data();
-
-            // pick a few generated priors and compare against the expected number.
-            // first prior
-            EXPECT_NEAR(dst_ptr[0], 0.03, 1e-6);
-            EXPECT_NEAR(dst_ptr[1], 0.03, 1e-6);
-            EXPECT_NEAR(dst_ptr[2], 0.07, 1e-6);
-            EXPECT_NEAR(dst_ptr[3], 0.07, 1e-6);
-            // second prior
-            EXPECT_NEAR(dst_ptr[4], 0.03, 0.1);
-            EXPECT_NEAR(dst_ptr[5], 0.03, 0.1);
-            EXPECT_NEAR(dst_ptr[6], 0.17, 0.1);
-            EXPECT_NEAR(dst_ptr[7], 0.03, 0.1);
-            // prior in the 5-th row and 5-th col
-            EXPECT_NEAR(dst_ptr[4 * 10 * 2 * 4 + 4 * 2 * 4], 0.83, 0.1);
-            EXPECT_NEAR(dst_ptr[4 * 10 * 2 * 4 + 4 * 2 * 4 + 1], 0.83, 0.1);
-            EXPECT_NEAR(dst_ptr[4 * 10 * 2 * 4 + 4 * 2 * 4 + 2], 0.84, 0.1);
-            EXPECT_NEAR(dst_ptr[4 * 10 * 2 * 4 + 4 * 2 * 4 + 3], 0.84, 0.1);
-
-            // check variance
-            dst_ptr += p.out.h * p.out.w;
-            for (int d = 0; d < p.out.h * p.out.w; ++d) {
-                EXPECT_NEAR(dst_ptr[d], 0.1, 1e-6);
-            }
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPUPriorBoxDensityTest, TestsPriorBoxDensity) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsPriorBoxDensity, smoke_CPUPriorBoxDensityTest,
-    ::testing::Values(
-        priorbox_test_params{ "CPU",
-        10,{ 10, 10, 10 },{ 3, 100, 100 },{ 2, 1, 400 }, 0, 0, 4, 9, true, true }));
-
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/region_yolo_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/region_yolo_tests.cpp
deleted file mode 100644
index 9e998088948..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/single_layer_tests/region_yolo_tests.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include "common_test_utils/data_utils.hpp"
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-struct region_yolo_test_params {
-    std::vector<size_t> src_dims;
-    std::vector<size_t> dst_dims;
-    int classes;
-    int coords;
-    int num;
-    float do_softmax;
-    std::vector<int> mask;
-};
-
-static inline int entry_index(int width, int height, int coords, int classes, int outputs, int batch, int location,
-                       int entry) {
-    int n = location / (width * height);
-    int loc = location % (width * height);
-    return batch * outputs + n * width * height * (coords + classes + 1) +
-           entry * width * height + loc;
-}
-
-static inline float logistic_activate(float x) {
-    return 1.f / (1.f + exp(-x));
-}
-
-static inline
-void softmax_generic(const float *src_data, float *dst_data, int B, int C, int H, int W) {
-    int start = 0;
-    for (int b = 0; b < B; b++) {
-        for (int i = start; i < H * W; i++) {
-            float max = src_data[b * C * H * W + i];
-            for (int c = 0; c < C; c++) {
-                float val = src_data[b * C * H * W + c * H * W + i];
-                if (val > max) max = val;
-            }
-
-            float expSum = 0;
-            for (int c = 0; c < C; c++) {
-                dst_data[b * C * H * W + c * H * W + i] = exp(src_data[b * C * H * W + c * H * W + i] - max);
-                expSum += dst_data[b * C * H * W + c * H * W + i];
-            }
-
-            for (int c = 0; c < C; c++) {
-                dst_data[b * C * H * W + c * H * W + i] = dst_data[b * C * H * W + c * H * W + i] / expSum;
-            }
-        }
-    }
-}
-
-static void ref_region_yolo(InferenceEngine::TBlob<float> &src, InferenceEngine::TBlob<float> &dst, region_yolo_test_params p) {
-    float* src_data = src.data();
-    float* dst_data = dst.data();
-
-    int mask_size = p.mask.size();;
-
-    int IW = (src.getTensorDesc().getDims().size() > 3) ? src.getTensorDesc().getDims()[3] : 1;
-    int IH = (src.getTensorDesc().getDims().size() > 2) ? src.getTensorDesc().getDims()[2] : 1;
-    int B = (src.getTensorDesc().getDims().size() > 0) ? src.getTensorDesc().getDims()[0] : 1;
-
-    for (int i = 0; i < src.size(); i++) {
-        dst_data[i] = src_data[i];
-    }
-
-    int end_index = 0;
-    int num_ = 0;
-    if (p.do_softmax) {
-        // Region layer (Yolo v2)
-        end_index = IW * IH;
-        num_ = p.num;
-    } else {
-        // Yolo layer (Yolo v3)
-        end_index = IW * IH * (p.classes + 1);
-        num_ = mask_size;
-    }
-    int inputs_size = IH * IW * num_ * (p.classes + p.coords + 1);
-
-    for (int b = 0; b < B; b++) {
-        for (int n = 0; n < num_; n++) {
-            int index = entry_index(IW, IH, p.coords, p.classes, inputs_size, b, n * IW * IH, 0);
-            for (int i = index; i < index + 2 * IW * IH; i++) {
-                dst_data[i] = logistic_activate(dst_data[i]);
-            }
-
-            index = entry_index(IW, IH, p.coords, p.classes, inputs_size, b, n * IW * IH, p.coords);
-            for (int i = index; i < index + end_index; i++) {
-                dst_data[i] = logistic_activate(dst_data[i]);
-            }
-        }
-    }
-
-    if (p.do_softmax) {
-        int index = entry_index(IW, IH, p.coords, p.classes, inputs_size, 0, 0, p.coords + 1);
-        int batch_offset = inputs_size / p.num;
-        for (int b = 0; b < B * p.num; b++)
-            softmax_generic(src_data + index + b * batch_offset, dst_data + index + b * batch_offset, 1, p.classes,
-                            IH, IW);
-    }
-}
-
-class smoke_CPU_RegionYoloOnlyTest: public TestsCommon, public WithParamInterface<region_yolo_test_params> {
-    std::string model_t = R"V0G0N(
-<net name="RegionYoloOnly" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer id="0" name="input" type="Input" precision="FP32" >
-            <output>
-                <port id="0">__SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer id="1" name="region_yolo" type="RegionYolo" precision="FP32">
-            <data classes="_CLASSES_" coords="_COORDS_" do_softmax="_DO_SOFTMAX_" mask="_MASK_" num="_NUM_"/>
-            <input>
-                <port id="0">__SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="1">__DST_DIMS__
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-    std::string getModel(region_yolo_test_params p) {
-        std::string model = model_t;
-
-
-        std::string src_dims;
-        for (auto &dim : p.src_dims) {
-            src_dims += "\n                    <dim>";
-            src_dims += std::to_string(dim) + "</dim>";
-        }
-        REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
-
-        std::string dst_dims;
-        for (auto &dim : p.dst_dims) {
-            dst_dims += "\n                    <dim>";
-            dst_dims += std::to_string(dim) + "</dim>";
-        }
-        REPLACE_WITH_STR(model, "__DST_DIMS__", dst_dims);
-
-        std::string mask;
-        for (auto &n : p.mask) {
-            mask += std::to_string(n) + ",";
-        }
-        mask.pop_back();
-        REPLACE_WITH_STR(model, "_MASK_", mask);
-
-
-        REPLACE_WITH_STR(model, "_CLASSES_", std::to_string(p.classes));
-        REPLACE_WITH_STR(model, "_COORDS_", std::to_string(p.coords));
-        REPLACE_WITH_STR(model, "_DO_SOFTMAX_", std::to_string(p.do_softmax));
-        REPLACE_WITH_STR(model, "_NUM_", std::to_string(p.num));
-
-
-        return model;
-    }
-
-    virtual void SetUp() {
-        try {
-            region_yolo_test_params p = ::testing::WithParamInterface<region_yolo_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork net = ie.ReadNetwork(model, Blob::CPtr());
-
-            Blob::Ptr src = make_shared_blob<float>({Precision::FP32, p.src_dims, Layout::ANY});
-            src->allocate();
-
-            TBlob<float>* srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            CommonTestUtils::fill_data_sine(src->buffer(), src->size(), 10, 30, 1);
-
-            BlobMap srcs;
-            srcs.insert(std::pair<std::string, Blob::Ptr>("input", src));
-
-            OutputsDataMap out;
-            out = net.getOutputsInfo();
-            BlobMap outputBlobs;
-
-            std::pair<std::string, DataPtr> item = *out.begin();
-
-            TBlob<float>::Ptr output;
-            output = make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_region_yolo(*srcPtr, dst_ref, p);
-
-            ExecutableNetwork exeNetwork = ie.LoadNetwork(net, "CPU");
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            inferRequest.SetInput(srcs);
-            inferRequest.SetOutput(outputBlobs);
-            inferRequest.Infer();
-
-            compare(*outputBlobs.begin()->second, dst_ref);
-
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(smoke_CPU_RegionYoloOnlyTest, TestsRegionYolo) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsRegionYolo, smoke_CPU_RegionYoloOnlyTest,
-        ::testing::Values(
-                region_yolo_test_params{{1, 255, 52, 52}, {1, 255, 52, 52}, 80, 4, 9, 0, {0, 1, 2}},
-                region_yolo_test_params{{1, 255, 26, 26}, {1, 255, 26, 26}, 80, 4, 9, 0, {3, 4, 5}},
-                region_yolo_test_params{{1, 255, 13, 13}, {1, 255, 13, 13}, 80, 4, 9, 0, {6, 7, 8}},
-                region_yolo_test_params{{1, 125, 13, 13}, {1, 21125}, 20, 4, 5, 1, {0, 1, 2}}
-        ));
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/snippet_test/multi_out_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/snippet_test/multi_out_test.cpp
deleted file mode 100644
index c94817cd87b..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/snippet_test/multi_out_test.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include "common_test_utils/xml_net_builder/xml_net_builder.hpp"
-#include "tests_common.hpp"
-#include "precision_utils.h"
-#include <ie_core.hpp>
-
-using namespace InferenceEngine;
-using std::string;
-using std::pair;
-using std::map;
-using std::vector;
-
-const static size_t _H = 16;
-const static size_t _W = 16;
-const static size_t _C = 1;
-const static size_t _B = 2;
-
-const static SizeVector dims    {_B, _C, _H, _W};
-
-class MultiOutConnectNet : CommonTestUtils::V2NetBuilder {
-    std::string model;
-    TBlob<uint8_t>::Ptr weightsPtr;
-
-public:
-    MultiOutConnectNet(): CommonTestUtils::V2NetBuilder(buildNetworkWithOneInput(
-            "MultiOutNet", {_B, 3*_C, _H, _W}, "FP32")) {
-		weightsPtr = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, SizeVector{0}, Layout::C));
-		weightsPtr->allocate();
-
-		/**
-		 *      [in]
-		 *        |
-		 *   [__split__]
-		 *    |   |   |
-         * [out1] |  [out2]
-         *        |_______
-         *        |       |
-		 *   [power1]   [power2]
-		 *        |       |
-		 *     [out3]   [out4]
-		 */
-        addLayer("Split", "FP32", nullptr,
-                 { {{_B, 3*_C, _H, _W}},
-                   {dims, dims, dims}});
-
-        map<string, string> pow_params = { {"scale", "-1"}, {"shift", "0"}, {"power", "1"} };
-        addLayer("Power", "FP32", &pow_params,
-                 { {dims}, {dims} });
-
-        addLayer("Power", "FP32", &pow_params,
-                 { {dims}, {dims} });
-
-        vector<pair<string, string>> edges = {
-                {"0,0", "1,1"},
-                {"1,3", "2,5"},
-                {"1,3", "3,7"}
-        };
-        model = finish(&edges);
-    }
-
-    CNNNetwork net(Core & ie) {
-        return ie.ReadNetwork(model, weightsPtr);
-    }
-};
-
-using test_param = std::tuple<string>;
-
-class smoke_MultiOutConnectTest : public ::testing::TestWithParam<test_param> {
-protected:
-    string device_name;
-    MultiOutConnectNet topology;
-
-    void SetUp() override {
-        device_name = std::get<0>(GetParam());
-    }
-};
-
-static void fill_with(Blob::Ptr &blob, std::vector<float> vals) {
-    float* ptr = blob->buffer().as<float*>();
-    const size_t size = blob->size();
-    const size_t fill_size = vals.size();
-
-    for (int i = 0; i < size; i++)
-        ptr[i] = vals[i%fill_size];
-}
-
-static bool check_with(Blob::Ptr &blob, std::vector<float> vals) {
-    float* ptr = blob->buffer().as<float*>();
-    const size_t size = blob->size();
-    const size_t fill_size = vals.size();
-
-    bool res = true;
-    for (int i = 0; i < size; i++)
-        if (ptr[i] != vals[i%fill_size])
-            res = false;
-    return res;
-}
-
-TEST_P(smoke_MultiOutConnectTest, canLoad) {
-    Core ie;
-    CNNNetwork net = topology.net(ie);
-
-    auto execNet = ie.LoadNetwork(net, device_name);
-    auto req = execNet.CreateInferRequest();
-
-    auto input = req.GetBlob("Input0");
-    fill_with(input, {1,2,3,4});
-
-    req.Infer();
-
-    auto output1 = req.GetBlob("Power2");
-    auto output2 = req.GetBlob("Power3");
-    ASSERT_TRUE(check_with(output1, {-1,-2,-3,-4}));
-    ASSERT_TRUE(check_with(output2, {-1,-2,-3,-4}));
-}
-
-#define PLUGING_CASE(_plugin, _test) \
-    INSTANTIATE_TEST_CASE_P(_plugin##_run, _test, ::testing::Values(#_plugin) )
-
-PLUGING_CASE(CPU, smoke_MultiOutConnectTest);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/snippet_test/tripple_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/snippet_test/tripple_test.cpp
deleted file mode 100644
index cf544daac38..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/snippet_test/tripple_test.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include "common_test_utils/xml_net_builder/xml_net_builder.hpp"
-#include "tests_common.hpp"
-#include "precision_utils.h"
-#include <ie_core.hpp>
-
-using namespace InferenceEngine;
-using std::string;
-using std::pair;
-using std::map;
-using std::vector;
-
-const static size_t _H = 16;
-const static size_t _W = 16;
-const static size_t _C = 1;
-const static size_t _B = 2;
-
-const static SizeVector dims {_B, _C, _H, _W};
-
-class TripleConnectNet : CommonTestUtils::V2NetBuilder {
-    std::string model;
-    TBlob<uint8_t>::Ptr weightsPtr;
-
-public:
-	TripleConnectNet(): CommonTestUtils::V2NetBuilder(buildNetworkWithOneInput(
-            "Triple_Net", {_B, _C, _H, _W}, "FP32")) {
-		weightsPtr = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, SizeVector{0}, Layout::C));
-		weightsPtr->allocate();
-
-		/**
-		 *      [in]
-		 *     ___|___
-		 *    |   |   |
-         *   [0] [1] [2]
-		 *  [__Concat___]
-		 *        |
-		 *      [out]
-		 */
-        map<string, string> lstm_params = {};
-        addLayer("Concat", "FP32",
-                 &lstm_params,
-                 {  // input dims
-					{dims, dims, dims},
-					// output dims
-					{{_B, 3*_C, _H, _W}}
-				 });
-
-        vector<pair<string, string>> edges = {
-                {"0,0", "1,1"},
-                {"0,0", "1,2"},
-                {"0,0", "1,3"}
-        };
-        model = finish(&edges);
-    }
-
-    CNNNetwork net(Core & ie) {
-        return ie.ReadNetwork(model, weightsPtr);
-    }
-};
-
-using test_param = std::tuple<string>;
-
-class smoke_TripleConnectTest : public ::testing::TestWithParam<test_param> {
-protected:
-    string device_name;
-    TripleConnectNet topology;
-
-    void SetUp() override {
-        device_name = std::get<0>(GetParam());
-    }
-};
-
-static void fill_with(Blob::Ptr &blob, std::vector<float> vals) {
-    float* ptr = blob->buffer().as<float*>();
-    const size_t size = blob->size();
-    const size_t fill_size = vals.size();
-
-    for (int i = 0; i < size; i++)
-        ptr[i] = vals[i%fill_size];
-}
-
-static bool check_with(Blob::Ptr &blob, std::vector<float> vals) {
-    float* ptr = blob->buffer().as<float*>();
-    const size_t size = blob->size();
-    const size_t fill_size = vals.size();
-
-    bool res = true;
-    for (int i = 0; i < size; i++)
-        if (ptr[i] != vals[i%fill_size])
-            res = false;
-    return res;
-}
-
-TEST_P(smoke_TripleConnectTest, canLoad) {
-    Core ie;
-    CNNNetwork net = topology.net(ie);
-
-    auto execNet = ie.LoadNetwork(net, device_name);
-    auto req = execNet.CreateInferRequest();
-
-    auto input = req.GetBlob("Input0");
-    fill_with(input, {1,2,3,4});
-
-    req.Infer();
-
-    auto output = req.GetBlob("Concat1");
-    ASSERT_TRUE(check_with(output, {1,2,3,4}));
-}
-
-#define PLUGING_CASE(_plugin, _test) \
-    INSTANTIATE_TEST_CASE_P(_plugin##_run, _test, ::testing::Values(#_plugin) )
-
-PLUGING_CASE(CPU, smoke_TripleConnectTest);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/test_model_repo.cpp b/inference-engine/tests_deprecated/functional/mkldnn/test_model_repo.cpp
deleted file mode 100644
index 97ab83c5c31..00000000000
--- a/inference-engine/tests_deprecated/functional/mkldnn/test_model_repo.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_model_repo.hpp"
-
-std::string get_model_repo() {
-    return "models:";
-};
-
-const char* TestDataHelpers::getModelPathNonFatal() noexcept {
-    return TestDataHelpers::getModelPathNonFatalDefault();
-}
-
-std::string TestDataHelpers::get_data_path() {
-    return TestDataHelpers::get_data_path_default();
-}
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt
index 9746f06602f..333be83b579 100644
--- a/inference-engine/tests_deprecated/unit/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt
@@ -52,29 +52,6 @@ if (ENABLE_GNA)
     endif()
 endif()
 
-if (ENABLE_MKL_DNN)
-    if (GEMM STREQUAL "MKL")
-        add_definitions(-DUSE_MKL)
-    endif ()
-    file(GLOB
-            MKLDNN_TESTS
-            engines/mkldnn/*.cpp
-            engines/mkldnn/graph/layers/extensions/*.cpp
-            engines/mkldnn/graph/layers/internal/*.cpp
-            engines/mkldnn/graph/structure/*.cpp
-            engines/mkldnn/graph/*.cpp)
-    file(GLOB
-            MKLDNN_TESTS_INCLUDE engines/mkldnn/graph/*.hpp)
-
-    source_group("mkldnn" FILES ${MKLDNN_TESTS} ${MKLDNN_TESTS_INCLUDE})
-
-    include_directories(engines/mkldnn/graph)
-
-    list(APPEND TEST_SRC ${MKLDNN_TESTS})
-    list(APPEND TEST_INCLUDE ${MKLDNN_TESTS_INCLUDE})
-    list(APPEND TEST_DEPS MKLDNNPlugin_obj)
-endif ()
-
 if (ENABLE_MYRIAD)
     include(${XLINK_DIR}/XLink.cmake)
 
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp
deleted file mode 100644
index dfe0789481b..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp
+++ /dev/null
@@ -1,304 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <ie_iextension.h>
-#include <ie_core.hpp>
-#include <ie_common.h>
-#include <legacy/ie_layers.h>
-#include "graph/test_graph.hpp"
-#include <tests_common.hpp>
-
-using namespace ::testing;
-
-class ConstLayerImpl : public InferenceEngine::ILayerExecImpl {
-public:
-    explicit ConstLayerImpl(const InferenceEngine::CNNLayer *layer): cnnLayer(*layer) {}
-    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc *resp) noexcept override {
-        InferenceEngine::LayerConfig config;
-        config.dynBatchSupport = 0;
-        if (cnnLayer.outData.size() != 1 && cnnLayer.insData.size() != 1)
-            return InferenceEngine::GENERAL_ERROR;
-        InferenceEngine::DataConfig cfg;
-        cfg.constant = true;
-        cfg.inPlace = 0;
-        InferenceEngine::SizeVector order;
-        for(size_t i = 0; i < cnnLayer.outData[0]->getTensorDesc().getDims().size(); i++) {
-            order.push_back(i);
-        }
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer.outData[0]->getTensorDesc().getPrecision(),
-                                               cnnLayer.outData[0]->getTensorDesc().getDims(),
-                                               {cnnLayer.outData[0]->getTensorDesc().getDims(), order});
-        config.outConfs.push_back(cfg);
-        config.inConfs.push_back(cfg);
-        conf.push_back(config);
-        return InferenceEngine::OK;
-    }
-
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc *resp) noexcept override {
-        value = cnnLayer.GetParamAsInt("const_val", 1);
-        if (config.dynBatchSupport)
-            return InferenceEngine::NOT_IMPLEMENTED;
-        for(auto input : config.inConfs) {
-            if (!input.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        for(auto output : config.outConfs) {
-            if (!output.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::ResponseDesc *resp) noexcept override {
-        float *dst_data = outputs[0]->buffer();
-
-        size_t data_size = outputs[0]->size();
-        for (size_t i = 0; i < data_size; i++) {
-            dst_data[i] = value;
-        }
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer cnnLayer;
-    int value = 0;
-};
-
-class ConstLayerFactory : public InferenceEngine::ILayerImplFactory {
-public:
-    ConstLayerFactory(const InferenceEngine::CNNLayer *layer): cnnLayer(*layer) {}
-    // First implementation has more priority than next
-    InferenceEngine::StatusCode getImplementations(std::vector<InferenceEngine::ILayerImpl::Ptr>& impls, InferenceEngine::ResponseDesc *resp) noexcept override {
-        impls.push_back(InferenceEngine::ILayerImpl::Ptr(new ConstLayerImpl(&cnnLayer)));
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer cnnLayer;
-};
-
-using fake_ext_factory = std::function<InferenceEngine::ILayerImplFactory*(const InferenceEngine::CNNLayer *)>;
-
-class FakeConstExtensionFabric : public InferenceEngine::Extensions::Cpu::MKLDNNExtensions {
-public:
-    FakeConstExtensionFabric() {
-        factories["ConstLayer"] = [](const InferenceEngine::CNNLayer * cnnLayer) -> InferenceEngine::ILayerImplFactory* { return new ConstLayerFactory(cnnLayer); };
-    }
-
-    virtual ~FakeConstExtensionFabric() {
-        factories.clear();
-    }
-
-    void GetVersion(const InferenceEngine::Version *&versionInfo) const noexcept override {}
-    void Unload() noexcept override {}
-    InferenceEngine::StatusCode getPrimitiveTypes(char**& types, unsigned int& size, InferenceEngine::ResponseDesc* resp) noexcept override {
-        types = new char *[factories.size()];
-        size_t count = 0;
-        for (auto it = factories.begin(); it != factories.end(); it++, count ++) {
-            types[count] = new char[it->first.size() + 1];
-            std::copy(it->first.begin(), it->first.end(), types[count]);
-            types[count][it->first.size() ] = '\0';
-        }
-        return InferenceEngine::OK;
-    };
-    InferenceEngine::StatusCode getFactoryFor(InferenceEngine::ILayerImplFactory *&factory,
-                                              const InferenceEngine::CNNLayer *cnnLayer,
-                                              InferenceEngine::ResponseDesc *resp) noexcept override {
-        if (factories.find(cnnLayer->type) == factories.end()) {
-            std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
-            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            return InferenceEngine::NOT_FOUND;
-        }
-        factory = factories[cnnLayer->type](cnnLayer);
-        return InferenceEngine::OK;
-    }
-
-private:
-    std::map<std::string, fake_ext_factory> factories;
-};
-
-class MKLDNNConstantPropagationTests: public TestsCommon {
-protected:
-    virtual void SetUp() {
-        TestsCommon::SetUp();
-        extension.reset(new FakeConstExtensionFabric());
-        extMgr.reset(new MKLDNNPlugin::MKLDNNExtensionManager());
-        extMgr->AddExtension(extension);
-    }
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr;
-    std::shared_ptr<InferenceEngine::IExtension> extension;
-};
-
-TEST_F(MKLDNNConstantPropagationTests, ConcatAfterConstLayers) {
-    std::string model = R"V0G0N(
-        <Net Name="CustomConcat_Only" version="2" precision="FP32" batch="1">
-            <layers>
-                <layer name="in1" type="Input" precision="FP32" id="0">
-                    <output>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>10</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="in2" type="Input" precision="FP32" id="1">
-                    <output>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="const1" type="ConstLayer" precision="FP32" id="2">
-                    <input>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>10</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>10</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="const2" type="ConstLayer" precision="FP32" id="3">
-                    <data const_val="4"/>
-                    <input>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="con" id="4" type="Concat" precision="FP32">
-                    <concat_data axis="2"/>
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>10</dim>
-                            <dim>5</dim>
-                        </port>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="3">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>15</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
-                <edge from-layer="1" from-port="0" to-layer="3" to-port="0"/>
-                <edge from-layer="2" from-port="1" to-layer="4" to-port="1"/>
-                <edge from-layer="3" from-port="1" to-layer="4" to-port="2"/>
-            </edges>
-        </Net>
-        )V0G0N";
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network, extMgr);
-
-    InferenceEngine::SizeVector dims_src1 = {1, 2, 10, 5};
-
-    InferenceEngine::Blob::Ptr src1 =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::NCHW});
-    src1->allocate();
-
-    InferenceEngine::SizeVector dims_src2 = {1, 2, 5, 5};
-
-    InferenceEngine::Blob::Ptr src2 =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::NCHW});
-    src2->allocate();
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-
-    auto it = out.begin();
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *it;
-
-    InferenceEngine::TensorDesc outputDesc1 = item.second->getTensorDesc();
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(outputDesc1);
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    auto& nodes = graph.getNodes();
-    bool existConcat = false;
-    for (auto& node : nodes) {
-        if (node->getType() != MKLDNNPlugin::Concatenation && node->getType() != MKLDNNPlugin::Generic)
-            continue;
-        if (node->getName() == "con" && node->getType() == MKLDNNPlugin::Concatenation)
-            existConcat = true;
-        ASSERT_TRUE(node->isConstant());
-    }
-
-    ASSERT_TRUE(existConcat);
-
-    graph.Infer(srcs, outputBlobs);
-
-    // Compare
-    float *dst_ptr = output->buffer();
-
-    int len1 = 1, len2 = 1, cycles;
-    for (int dim = 2; dim < output->getTensorDesc().getDims().size(); dim++) {
-        len1 *= src1->getTensorDesc().getDims()[dim];
-        len2 *= src2->getTensorDesc().getDims()[dim];
-    }
-    cycles = 2;
-
-    int index1 = 0, index2 = 0, index = 0;
-    for (int cycle = 0; cycle < cycles; cycle ++) {
-        for (int i1 = 0; i1 < len1; i1++) {
-            if (1 != dst_ptr[index]) {
-                FAIL() << "index: " << index << " src: " << 1 << ", dst: " << dst_ptr[index];
-            }
-            index1++; index++;
-        }
-        for (int i2 = 0; i2 < len2; i2++) {
-            if (4 != dst_ptr[index]) {
-                FAIL() << "index: " << index << " src: " << 4 << ", dst: " << dst_ptr[index];
-            }
-            index2++; index++;
-        }
-    }
-}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/convert_desc_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/convert_desc_test.cpp
deleted file mode 100644
index 89da83f8cf2..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/convert_desc_test.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include <mkldnn_memory.h>
-#include <ie_common.h>
-
-#include <vector>
-
-#include <common/memory_desc_wrapper.hpp>
-
-namespace IE = InferenceEngine;
-using Tag = mkldnn::memory::format_tag;
-using RefDesc = mkldnn::impl::memory_desc_wrapper;
-using MKLDNNPlugin::MKLDNNMemory;
-using MKLDNNPlugin::MKLDNNMemoryDesc;
-
-TEST(TensorDescTests, checkOff) {
-    auto workload = std::vector<std::pair<IE::SizeVector, Tag>>{
-            {{5},                Tag::a},
-            {{10, 3},            Tag::ab},
-            {{5,  3},            Tag::ba},
-            {{1,  3,  8, 8},     Tag::abcd},
-            {{1,  3,  5, 2},     Tag::acdb},
-            {{1,  24, 5, 7},     Tag::aBcd8b},
-            {{2,  10, 3, 3},     Tag::aBcd8b},
-            {{1,  3,  8, 8},     Tag::aBcd8b},
-            {{1,  32, 8, 8},     Tag::aBcd16b},
-            {{1,  32, 8, 8},     Tag::aBcd16b},
-            {{2,  3,  5, 2, 1},  Tag::abcde},
-    };
-
-    for (const auto &p : workload) {
-            mkldnn::memory::dims dims {p.first.begin(), p.first.end()};
-
-            const auto cpu_tDesc = MKLDNNMemoryDesc {dims, mkldnn::memory::data_type::f32, p.second};
-            const auto ie_tDesc  = IE::TensorDesc {cpu_tDesc};
-
-            mkldnn::memory::desc dnnl_tdesc = cpu_tDesc;
-            const RefDesc ref(dnnl_tdesc.data);
-            size_t total_size = cpu_tDesc.getDims().size();
-
-            for (size_t i = 0; i < total_size; i++) {
-                ASSERT_EQ(ie_tDesc.offset(i), ref.off_l(i)) << "Offset calculation are different";
-            }
-    }
-}
-
-TEST(TensorDescTests, convertToFrom) {
-    struct Param { IE::SizeVector dims, blk, ord; };
-    auto workload = std::vector<Param>{
-            {{5}, {5}, {0}},
-            {{10, 3}, {10, 3}, {0, 1}},
-            {{1, 3, 8, 8}, {1, 8, 8, 3}, {0, 2, 3, 1}},
-            {{1, 3, 8, 8}, {1, 3, 8, 8}, {0, 1, 2, 3}},
-            {{1, 8, 8, 8}, {1, 1, 8, 8, 8}, {0, 1, 2, 3, 1}},
-            {{1, 32, 8, 8}, {1, 2, 8, 8, 16}, {0, 1, 2, 3, 1}},
-            {{1, 3, 8}, {1, 3, 8}, {0, 1, 2}}
-    };
-
-    for (const auto &p : workload) {
-        const auto ie_tDesc  = IE::TensorDesc(IE::Precision::FP32, p.dims, {p.blk, p.ord});
-        const auto cpu_tDesc = MKLDNNMemoryDesc {ie_tDesc};
-
-        mkldnn::memory::desc dnnl_tdesc = cpu_tDesc;
-        const RefDesc ref(dnnl_tdesc.data);
-        size_t total_size = cpu_tDesc.getDims().size();
-
-        for (size_t i = 0; i < total_size; i++) {
-            ASSERT_EQ(ie_tDesc.offset(i), ref.off_l(i)) << "Offset calculation are different";
-        }
-    }
-}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/dummy.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/dummy.cpp
new file mode 100644
index 00000000000..ffe853f7697
--- /dev/null
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/dummy.cpp
@@ -0,0 +1,4 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/dump_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/dump_test.cpp
deleted file mode 100644
index 9373b61a7c2..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/dump_test.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include "ie_blob.h"
-#include "blob_factory.hpp"
-#include "utils/blob_dump.h"
-
-using namespace InferenceEngine;
-using namespace MKLDNNPlugin;
-
-TEST(MKLDNNDumpTests, UnallocatedBlob_NoDump) {
-    SizeVector dims {2,3,4,5};
-    Blob::Ptr blob = make_blob_with_precision({Precision::U8, dims, NHWC});
-
-    std::stringstream buff;
-
-    EXPECT_THROW({
-        BlobDumper(blob).dump(buff);
-    }, Exception);
-}
-
-TEST(MKLDNNDumpTests, EmptyBlob_NoDump) {
-    SizeVector dims {2,3,4,5};
-    Blob::Ptr blob;
-
-    std::stringstream buff;
-
-    EXPECT_THROW({
-        BlobDumper(blob).dump(buff);
-    }, Exception);
-}
-
-TEST(MKLDNNDumpTests, Ser) {
-    SizeVector dims {2,3,4,5};
-    Blob::Ptr blob = make_blob_with_precision({Precision::U8, dims, NHWC});
-    blob->allocate();
-
-    std::stringstream buff;
-    BlobDumper(blob).dump(buff);
-
-    ASSERT_GT(buff.str().size(), blob->byteSize());
-}
-
-TEST(MKLDNNDumpTests, SerDeser) {
-    SizeVector dims {2,3,4,5};
-    Blob::Ptr blob = make_blob_with_precision({Precision::U8, dims, NCHW});
-    blob->allocate();
-
-    std::stringstream buff;
-
-    BlobDumper(blob).dump(buff);
-    Blob::Ptr deser_blob = BlobDumper::read(buff).get();
-
-    ASSERT_EQ(deser_blob->getTensorDesc().getDims(), blob->getTensorDesc().getDims());
-    ASSERT_EQ(deser_blob->getTensorDesc().getPrecision(), blob->getTensorDesc().getPrecision());
-
-    std::vector<uint8_t> data(blob->buffer().as<uint8_t*>(), blob->buffer().as<uint8_t*>() + blob->size());
-    std::vector<uint8_t> deser_data(deser_blob->buffer().as<uint8_t*>(), deser_blob->buffer().as<uint8_t*>()
-                                    + deser_blob->size());
-    ASSERT_EQ(deser_data, data);
-}
-
-TEST(MKLDNNDumpTests, SerDeserWithScales) {
-    SizeVector dims {2,3,4,5};
-    auto blob = make_blob_with_precision({Precision::U8, dims, NCHW});
-    blob->allocate();
-
-    auto scls = make_blob_with_precision({Precision::FP32, {3}, C});
-    scls->allocate();
-
-    std::stringstream buff;
-
-    BlobDumper(blob).withScales(scls).dump(buff);
-    auto deser = BlobDumper::read(buff);
-    auto deser_blob = deser.get();
-    auto deser_scls = deser.getScales();
-
-    ASSERT_EQ(deser_blob->getTensorDesc().getDims(), blob->getTensorDesc().getDims());
-    ASSERT_EQ(deser_blob->getTensorDesc().getPrecision(), blob->getTensorDesc().getPrecision());
-
-    std::vector<uint8_t> data(blob->buffer().as<uint8_t*>(), blob->buffer().as<uint8_t*>() + blob->size());
-    std::vector<uint8_t> deser_data(deser_blob->buffer().as<uint8_t*>(), deser_blob->buffer().as<uint8_t*>()
-                                                                         + deser_blob->size());
-    ASSERT_EQ(deser_data, data);
-
-    std::vector<uint8_t> scls_data(scls->buffer().as<uint8_t*>(), scls->buffer().as<uint8_t*>() + scls->size());
-    std::vector<uint8_t> deser_scls_data(deser_scls->buffer().as<uint8_t*>(), deser_scls->buffer().as<uint8_t*>()
-                                                                         + deser_scls->size());
-    ASSERT_EQ(deser_scls_data, scls_data);
-}
-
-
-TEST(MKLDNNDumpTests, SerU8AsTxt) {
-    SizeVector dims {2,3,4,5};
-
-    Blob::Ptr blob = make_blob_with_precision({Precision::U8, dims, NCHW});
-    blob->allocate();
-
-    Blob::Ptr scls = make_blob_with_precision({Precision::FP32, {dims[1]}, C});
-    scls->allocate();
-
-    std::stringstream buff;
-    BlobDumper(blob).withScales(scls).dumpAsTxt(buff);
-
-    std::string deser_header, ref_header = "U8 4D shape: 2 3 4 5 (120)";
-    std::getline(buff, deser_header);
-    deser_header = deser_header.substr(0, ref_header.length());
-    ASSERT_EQ(deser_header, ref_header);
-
-    auto num_line = std::count(std::istreambuf_iterator<char>(buff),
-            std::istreambuf_iterator<char>(), '\n');
-    ASSERT_EQ(num_line, blob->size());
-}
-
-TEST(MKLDNNDumpTests, SerAsTxt) {
-    SizeVector dims {2,3};
-
-    Blob::Ptr blob = make_blob_with_precision({Precision::FP32, dims, NC});
-    blob->allocate();
-
-    Blob::Ptr scls = make_blob_with_precision({Precision::FP32, {dims[1]}, C});
-    scls->allocate();
-
-    std::stringstream buff;
-    BlobDumper(blob).withScales(scls).dumpAsTxt(buff);
-
-    std::string deser_header, ref_header = "FP32 2D shape: 2 3 (6)";
-    std::getline(buff, deser_header);
-    deser_header = deser_header.substr(0, ref_header.length());
-    ASSERT_EQ(deser_header, ref_header);
-
-    auto num_line = std::count(std::istreambuf_iterator<char>(buff),
-                               std::istreambuf_iterator<char>(), '\n');
-    ASSERT_EQ(num_line, blob->size());
-}
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp
deleted file mode 100644
index 3c95375c969..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include "mkldnn_graph.h"
-#include "mkldnn_graph_dumper.h"
-#include "ie_blob.h"
-#include <legacy/details/ie_cnn_network_tools.h>
-#include "common_test_utils/xml_net_builder/xml_net_builder.hpp"
-#include <ie_core.hpp>
-
-#include <string>
-#include <map>
-
-using namespace InferenceEngine;
-using namespace MKLDNNPlugin;
-using std::string;
-using std::map;
-
-class NetGen : CommonTestUtils::V2NetBuilder {
-    string model;
-    TBlob<uint8_t>::Ptr weights;
-
-public:
-    NetGen(): CommonTestUtils::V2NetBuilder(buildNetworkWithOneInput(
-            "SomeNet", {2,3,16,16}, "FP32")) {
-        using prm_t = map<string, string>;
-
-        CommonTestUtils::InOutShapes inout = {{{2,3,16,16}},{{2,16,16,16}}};
-
-        prm_t conv_prm = {
-                {"stride-x", std::to_string(1)},
-                {"stride-y", std::to_string(1)},
-                {"pad-x",    std::to_string(1)},
-                {"pad-y",    std::to_string(1)},
-                {"kernel-x", std::to_string(3)},
-                {"kernel-y", std::to_string(3)},
-                {"output",   std::to_string(16)},
-                {"group",    std::to_string(1)}
-        };
-        size_t wght = 3*16*3*3*sizeof(float);
-        size_t bias = 16*sizeof(float);
-
-        prm_t relu_prm = {{"negative_slope", std::to_string(0)}};
-
-        addLayer("Convolution", "FP32", &conv_prm, {{{2,3,16,16}},{{2,16,16,16}}}, wght, bias);
-        addLayer("Relu", "FP32", &relu_prm, {{{2,16,16,16}},{{2,16,16,16}}});
-
-        model = finish();
-
-        weights.reset(new TBlob<uint8_t>({Precision::U8, {wght+bias}, C}));
-        weights->allocate();
-    }
-
-    CNNNetwork net() {
-        InferenceEngine::Core core;
-        return core.ReadNetwork(model, weights);
-    }
-};
-
-TEST(MKLDNNLayersTests, DumpSimpleGraph) {
-    auto net = NetGen().net();
-    MKLDNNGraph graph;
-    MKLDNNExtensionManager::Ptr extMgr;
-    MKLDNNWeightsSharing::Ptr cache;
-
-    graph.CreateGraph(net, extMgr, cache);
-
-    auto dump_net = dump_graph_as_ie_net(graph);
-    auto layers = details::CNNNetSortTopologically(dump_net);
-
-    ASSERT_EQ(layers.size(), 4);
-    ASSERT_EQ(layers[0]->type, "Input");
-    ASSERT_EQ(layers[1]->type, "Convolution");
-    ASSERT_EQ(layers[2]->type, "Reorder");
-    ASSERT_EQ(layers[3]->type, "Output");
-}
-
-TEST(MKLDNNLayersTests, DumpSimpleGraphToDot) {
-    auto net = NetGen().net();
-    MKLDNNGraph graph;
-    MKLDNNExtensionManager::Ptr extMgr;
-    MKLDNNWeightsSharing::Ptr cache;
-    graph.CreateGraph(net, extMgr, cache);
-
-    std::stringstream buff;
-    dump_graph_as_dot(graph, buff);
-
-    std::string dot = buff.str();
-    std::cout << dot;
-    ASSERT_EQ(std::count(dot.begin(), dot.end(), '{'), 1); // 1-graph
-    ASSERT_EQ(std::count(dot.begin(), dot.end(), '}'), 1);
-    ASSERT_EQ(std::count(dot.begin(), dot.end(), '['), 10); // 4-node 3-data 3-shape
-    ASSERT_EQ(std::count(dot.begin(), dot.end(), ']'), 10);
-    ASSERT_EQ(std::count(dot.begin(), dot.end(), '>'), 6); // connection
-}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp
deleted file mode 100644
index 8f537758999..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp
+++ /dev/null
@@ -1,276 +0,0 @@
-﻿// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct broadcast_test_params {
-    std::string                 shape_precision;
-    std::string                 precision;
-    InferenceEngine::SizeVector in_shape;
-    InferenceEngine::SizeVector out_shape;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-
-template <typename data_t>
-void ref_broadcast(InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst) {
-    size_t i;
-    const data_t *src_data = src.data();
-    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
-    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
-
-    if (!src_dims.size())
-        src_dims = InferenceEngine::SizeVector(1, 1);
-    if (!srcStrides.size())
-        srcStrides = InferenceEngine::SizeVector(1, 1);
-    data_t* dst_data = dst.data();
-    InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
-    InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
-
-    if (src_dims.size() > dst_dims.size())
-        FAIL() << "Output tensor dimension is smaller then input tensor dimension";
-
-    size_t prefix_size = dst_dims.size() - src_dims.size();
-    for (i = 0; i < src_dims.size(); i++) {
-        if (src_dims[i] != 1 && src_dims[i] != dst_dims[i + prefix_size])
-            FAIL() << "In/Output corresponding dimension must have the same value, or Input dimension is equal to 1";
-    }
-
-    InferenceEngine::SizeVector src_aligned(dst_dims.size());
-    InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
-    for (i = 0; i < dst_dims.size(); i++) {
-        if (i < prefix_size) {
-            src_aligned[i] = 1;
-            srcStrides_aligned[i] = srcStrides[0];
-        } else {
-            src_aligned[i] = src_dims[i - prefix_size];
-            srcStrides_aligned[i] = srcStrides[i - prefix_size];
-        }
-    }
-
-    size_t src_idx, work_amount_dst = dstStrides[0] * dst_dims[0];
-    InferenceEngine::SizeVector counters(dst_dims.size(), 0);
-
-    for (size_t iwork = 0; iwork < work_amount_dst; ++iwork) {
-        for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
-            src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
-
-        dst_data[iwork] = src_data[src_idx];
-
-        for (int j = dst_dims.size() - 1; j >= 0; j--) {
-            counters[j] = (counters[j] + 1) % dst_dims[j];
-            if (counters[j] != 0) break;
-        }
-    }
-}
-
-
-class MKLDNNCPUExtBroadcastTests : public TestsCommon, public WithParamInterface<broadcast_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Broadcast_net" version="2" precision="_IIDXP_" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="_IIDXP_" id="1">
-            <output>
-                <port id="1">
-                    _IN_
-                </port>
-            </output>
-        </layer>
-        <layer name="shape" type="Input" precision="_ISDXP_" id="2">
-            <output>
-                <port id="2">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="Broadcast" precision="_IIDXP_">
-            <data/>
-            <input>
-                <port id="1">
-                    _IN_
-                </port>
-                <port id="2">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(broadcast_test_params p) {
-        std::string model = model_t;
-        std::string in_shape = "";
-        std::string out_shape;
-
-        REPLACE_WITH_STR(model, "_IIDXP_", p.precision);
-        REPLACE_WITH_STR(model, "_ISDXP_", p.shape_precision);
-        for (size_t i = 0; i < p.in_shape.size(); i++) {
-            in_shape += "<dim>";
-            in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_IN_", in_shape);
-        for (size_t i = 0; i < p.out_shape.size(); i++) {
-            out_shape += "<dim>";
-            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_OUT_", out_shape);
-        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.out_shape.size());
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            broadcast_test_params p = ::testing::WithParamInterface<broadcast_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            // Input Data
-            InferenceEngine::Blob::Ptr dims;
-            InferenceEngine::SizeVector vector_dim(1, p.out_shape.size());
-            if (p.shape_precision == "I32") {
-                dims = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, vector_dim, InferenceEngine::TensorDesc::getLayoutByDims(vector_dim) });
-                dims->allocate();
-                for (size_t i = 0; i < p.out_shape.size(); i++) {
-                    static_cast<int32_t*>(dims->buffer())[i] = static_cast<int32_t>(p.out_shape[i]);
-                }
-                auto * dimsPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(dims.get());
-                if (dimsPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-            }  else if (p.shape_precision == "FP32") {
-                dims = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, vector_dim, InferenceEngine::TensorDesc::getLayoutByDims(vector_dim) });
-                dims->allocate();
-                for (size_t i = 0; i < p.out_shape.size(); i++) {
-                    static_cast<float*>(dims->buffer())[i] = static_cast<float>(p.out_shape[i]);
-                }
-                auto * dimsPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(dims.get());
-                if (dimsPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-            }
-
-            InferenceEngine::BlobMap srcs;
-            InferenceEngine::Blob::Ptr src;
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-            if (p.precision == "I32") {
-                src = InferenceEngine::make_shared_blob<int32_t>({InferenceEngine::Precision::I32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape)});
-                src->allocate();
-                for (size_t i = 0; i < src->size(); i++)
-                    static_cast<int32_t*>(src->buffer())[i] = static_cast<int32_t>(i);
-                auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(src.get());
-                if (srcPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("shape", dims));
-
-                // Output Blob
-                InferenceEngine::TBlob<int32_t>::Ptr output;
-                output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                // Output Reference
-                InferenceEngine::TBlob<int32_t> dst_ref(item.second->getTensorDesc());
-                dst_ref.allocate();
-                ref_broadcast(*srcPtr, dst_ref);
-
-                // Infer
-                graph.Infer(srcs, outputBlobs);
-                for (int i = 0; i < dst_ref.size(); i++) {
-                    if (dst_ref.data()[i] != (*output).data()[i])
-                        FAIL() << "The difference between res_ptr[i] and ref_ptr[i]";
-                }
-            } else if (p.precision == "FP32") {
-                src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape)});
-                src->allocate();
-                fill_data_dbgval(src->buffer(), src->size());
-                auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-                if (srcPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("shape", dims));
-
-                // Output Blob
-                InferenceEngine::TBlob<float>::Ptr output;
-                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                // Output Reference
-                InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-                dst_ref.allocate();
-                ref_broadcast(*srcPtr, dst_ref);
-
-                // Infer
-                graph.Infer(srcs, outputBlobs);
-                compare(*output, dst_ref);
-            }
-            else {
-                return;
-            }
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtBroadcastTests, TestsBroadcast) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsBroadcast, MKLDNNCPUExtBroadcastTests,
-    ::testing::Values(
-        // Params: shape_precision, precision, in_shape, out_shape
-        broadcast_test_params{ "I32", "I32",{},{ 2, 3, 4 } },
-        broadcast_test_params{ "I32", "I32",{ 4, 1, 2 },{ 4, 2, 2 } },
-        broadcast_test_params{ "I32", "I32",{ 4, 2, 1 },{ 4, 2, 2 } },
-        broadcast_test_params{ "I32", "I32",{ 4, 2 },{ 2, 4, 2 } },
-        broadcast_test_params{ "I32", "I32",{ 4, 1, 1 },{ 4, 2, 1 } },
-        broadcast_test_params{ "I32", "I32",{ 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } },
-        broadcast_test_params{ "I32","FP32",{},{ 2, 3, 4 } },
-        broadcast_test_params{ "I32","FP32",{ 4, 1, 2 },{ 4, 2, 2 } },
-        broadcast_test_params{ "I32","FP32",{ 4, 2, 1 },{ 4, 2, 2 } },
-        broadcast_test_params{ "I32","FP32",{ 4, 2 },{ 2, 4, 2 } },
-        broadcast_test_params{ "I32","FP32",{ 4, 1, 1 },{ 4, 2, 1 } },
-        broadcast_test_params{ "I32","FP32", { 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } },
-        broadcast_test_params{"FP32","FP32",{ 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } }
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp
deleted file mode 100644
index f8b03c9fef6..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-#include <ie_core.hpp>
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-
-#include <algorithm>
-#include <vector>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct bucketize_test_params {
-    std::string model;
-    std::string precision;
-    std::string right;
-
-    InferenceEngine::SizeVector input_shape;
-    std::vector<float> input_value;
-
-    bool with_second_input;
-    InferenceEngine::SizeVector boundaries_shape;
-    std::vector<float> boundaries_value;
-
-    InferenceEngine::SizeVector output_shape;
-    std::vector<int> output_value_ref;
-
-    size_t num_prim_desc;
-    int selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNCPUExtBucketizeTests : public TestsCommon, public WithParamInterface<bucketize_test_params> {
-    std::string getModel(bucketize_test_params p) {
-        std::string model = p.model;
-
-        std::string input_shape;
-        std::string boundaries_shape;
-        std::string output_shape;
-
-        for (auto& shape : p.input_shape) {
-            input_shape += "<dim>";
-            input_shape += std::to_string(shape) + "</dim>\n";
-        }
-        if (p.with_second_input) {
-            for (auto& shape : p.boundaries_shape) {
-                boundaries_shape += "<dim>";
-                boundaries_shape += std::to_string(shape) + "</dim>\n";
-            }
-        }
-
-        for (auto& shape : p.output_shape) {
-            output_shape += "<dim>";
-            output_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_RIGHT_", p.right);
-        REPLACE_WITH_STR(model, "_INPUT_SHAPE_", input_shape);
-        REPLACE_WITH_STR(model, "_BOUNDARIES_SHAPE_", boundaries_shape);
-        REPLACE_WITH_STR(model, "_OUTPUT_SHAPE_", output_shape);
-
-        return model;
-    }
-
-protected:
-    static void compare_int(
-        InferenceEngine::Blob &res,
-        InferenceEngine::Blob &ref,
-        int max_diff = 0,
-        const std::string assertDetails = "") {
-        int *res_ptr = res.buffer().as<int*>();
-        size_t res_size = res.size();
-
-        int *ref_ptr = ref.buffer().as<int*>();
-        size_t ref_size = ref.size();
-
-        ASSERT_EQ(res_size, ref_size) << assertDetails;
-
-        for (size_t i = 0; i < ref_size; i++) {
-            ASSERT_EQ(res_ptr[i], ref_ptr[i]) << assertDetails;
-        }
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            bucketize_test_params p = ::testing::WithParamInterface<bucketize_test_params>::GetParam();
-            std::string model = getModel(p);
-
-                        InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-
-            for (auto &node : nodes) {
-                if (node->getName() == "Bucketize") {
-                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                        node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            // prepare input blob and input blob map
-            InferenceEngine::Blob::Ptr input = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_shape) });
-            input->allocate();
-            auto *input_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input.get());
-            std::copy(p.input_value.begin(), p.input_value.end(), (float *)input_ptr->data());
-            InferenceEngine::BlobMap input_blob_map;
-            input_blob_map["InputValues"] = input;
-
-            InferenceEngine::Blob::Ptr boundaries = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.boundaries_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.boundaries_shape) });
-            boundaries->allocate();
-            auto *boundaries_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(boundaries.get());
-            std::copy(p.boundaries_value.begin(), p.boundaries_value.end(), (float *)boundaries_ptr->data());
-            input_blob_map["BoundariesValues"] = boundaries;
-
-            // prepare output blob map
-            InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-            InferenceEngine::BlobMap output_blob_map;
-            for (auto iter = out.begin(); iter != out.end(); iter++) {
-                std::pair<std::string, InferenceEngine::DataPtr> item = *iter;
-                InferenceEngine::Blob::Ptr output_blob_ptr = InferenceEngine::make_shared_blob<int>(item.second->getTensorDesc());
-                output_blob_ptr->allocate();
-                output_blob_map[item.first] = output_blob_ptr;
-            }
-
-            // prepare blobs with reference data
-            InferenceEngine::Blob::Ptr output_blob_ref = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                 p.output_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_shape) });
-            output_blob_ref->allocate();
-            auto *output_blob_ref_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(output_blob_ref.get());
-            std::copy(p.output_value_ref.begin(), p.output_value_ref.end(), (int *)output_blob_ref_ptr->data());
-
-            // infer
-            graph.Infer(input_blob_map, output_blob_map);
-
-            // check the result
-            auto iter = out.begin();
-            compare_int(*output_blob_map[iter->first], *output_blob_ref, 0);
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtBucketizeTests, TestsBucketize) {}
-
-// model 1 that contains one Bucketize layer
-std::string bucketize_model1 = R"V0G0N(
-<net Name="Bucketize_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputValues" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    _INPUT_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="BoundariesValues" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="0">
-                    _BOUNDARIES_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="BucketizeLayer" id="2" type="Bucketize" precision="I32">
-			<data with_right_bound=_RIGHT_/>
-            <input>
-                <port id="0">
-                    _INPUT_SHAPE_
-                </port>
-                <port id="1">
-                    _BOUNDARIES_SHAPE_
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    _OUTPUT_SHAPE_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-// case 1 - the right attribute equal to False
-InferenceEngine::SizeVector bucketize_input_shape_case1 = { 10 };
-std::vector<float>          bucketize_input_value_case1 = { 8.f, 1.f, 2.f, 1.f, 8.f, 5.f, 1.f, 5.f, 0.f, 20.f };
-std::string                 bucketize_right_case1 = "\"false\"";
-bool                        bucketize_with_second_input_case1 = true;
-InferenceEngine::SizeVector bucketize_boundaries_shape_case1 = { 4 };
-std::vector<float>          bucketize_boundaries_value_case1 = { 1.f, 4.f, 10.f, 20.f};
-InferenceEngine::SizeVector bucketize_output_shape_case1 = { 10 };
-std::vector<int>            bucketize_output_value_ref_case1 = { 2, 1, 1, 1, 2, 2, 1, 2, 0, 4 };
-
-// case 2 - the right attribute equal to True
-InferenceEngine::SizeVector bucketize_input_shape_case2 = { 10 };
-std::vector<float>          bucketize_input_value_case2 = { 8.f, 1.f, 2.f, 1.f, 8.f, 5.f, 1.f, 5.f, 0.f, 20.f };
-std::string                 bucketize_right_case2 = "\"true\"";
-bool                        bucketize_with_second_input_case2 = true;
-InferenceEngine::SizeVector bucketize_boundaries_shape_case2 = { 4 };
-std::vector<float>          bucketize_boundaries_value_case2 = { 1.f, 4.f, 10.f, 20.f };
-InferenceEngine::SizeVector bucketize_output_shape_case2 = { 10 };
-std::vector<int>            bucketize_output_value_ref_case2 = { 2, 0, 1, 0, 2, 2, 0, 2, 0, 3 };
-
-INSTANTIATE_TEST_CASE_P(
-    TestsBucketize, MKLDNNCPUExtBucketizeTests,
-    ::testing::Values(
-        bucketize_test_params {
-            bucketize_model1, "I32", bucketize_right_case1,
-            bucketize_input_shape_case1, bucketize_input_value_case1,
-            bucketize_with_second_input_case1, bucketize_boundaries_shape_case1, bucketize_boundaries_value_case1,
-            bucketize_output_shape_case1, bucketize_output_value_ref_case1,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        bucketize_test_params{
-            bucketize_model1, "I32", bucketize_right_case2,
-            bucketize_input_shape_case2, bucketize_input_value_case2,
-            bucketize_with_second_input_case2, bucketize_boundaries_shape_case2, bucketize_boundaries_value_case2,
-            bucketize_output_shape_case2, bucketize_output_value_ref_case2,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        }
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
deleted file mode 100644
index e59bbb0dbfa..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <ie_iextension.h>
-#include <string>
-#include <map>
-#include <memory>
-#include <algorithm>
-#include "nodes/base.hpp"
-
-using namespace InferenceEngine;
-using namespace Extensions;
-
-struct TestExtensionsHolder {
-    std::map<std::string, Cpu::ext_factory> list;
-};
-
-
-class FakeExtensions : public Cpu::MKLDNNExtensions {
- public:
-    void Unload() noexcept override {};
-
-    static std::shared_ptr<TestExtensionsHolder> GetExtensionsHolder() {
-        static std::shared_ptr<TestExtensionsHolder> localHolder;
-        if (localHolder == nullptr) {
-            localHolder = std::shared_ptr<TestExtensionsHolder>(new TestExtensionsHolder());
-        }
-        return localHolder;
-    }
-
-    static void AddExt(std::string name, Cpu::ext_factory factory) {
-        GetExtensionsHolder()->list[name] = factory;
-    }
-
-    void GetVersion(const Version *&versionInfo) const noexcept override {
-        static Version ExtensionDescription = {
-            {2, 1},    // extension API version
-            "2.1",
-            "ie-cpu-ext"  // extension description message
-        };
-
-        versionInfo = &ExtensionDescription;
-    }
-
-    StatusCode getPrimitiveTypes(char **&types, unsigned int &size, ResponseDesc *resp) noexcept override {
-        collectTypes(types, size, GetExtensionsHolder()->list);
-        return OK;
-    };
-    StatusCode getFactoryFor(ILayerImplFactory *&factory, const CNNLayer *cnnLayer, ResponseDesc *resp) noexcept override {
-        auto &factories = GetExtensionsHolder()->list;
-        if (factories.find(cnnLayer->type) == factories.end()) {
-            std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
-            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            return NOT_FOUND;
-        }
-        factory = factories[cnnLayer->type](cnnLayer);
-        return OK;
-    }
-
-    template<class T>
-    void collectTypes(char **&types, unsigned int &size, const std::map<std::string, T> &factories) {
-        types = new char *[factories.size()];
-        unsigned count = 0;
-        for (auto it = factories.begin(); it != factories.end(); it++, count++) {
-            types[count] = new char[it->first.size() + 1];
-            std::copy(it->first.begin(), it->first.end(), types[count]);
-            types[count][it->first.size()] = '\0';
-        }
-        size = count;
-    }
-};
-
-class FakeLayerPLNImpl: public Cpu::ExtLayerBase {
-public:
-    explicit FakeLayerPLNImpl(const CNNLayer* layer) {
-        try {
-            addConfig(layer, {{ConfLayout::PLN, false, 0}}, {{ConfLayout::PLN, false, 0}});
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        return OK;
-    }
-};
-
-class FakeLayerBLKImpl: public Cpu::ExtLayerBase {
-public:
-    explicit FakeLayerBLKImpl(const CNNLayer* layer) {
-        try {
-#if defined(HAVE_AVX512F)
-            auto blk_layout = ConfLayout::BLK16;
-#else
-            auto blk_layout = ConfLayout::BLK8;
-#endif
-            addConfig(layer, {{blk_layout, false, 0}}, {{blk_layout, false, 0}});
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        return OK;
-    }
-};
-
-template<typename Ext>
-class FakeRegisterBase {
- public:
-    explicit FakeRegisterBase(const std::string& type) {
-        FakeExtensions::AddExt(type,
-                              [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
-                                  return new Ext(layer);
-                              });
-    }
-};
-
-#define REG_FAKE_FACTORY_FOR(__prim, __type) \
-static FakeRegisterBase<__prim> __reg__##__type(#__type)
-
-REG_FAKE_FACTORY_FOR(Cpu::ImplFactory<FakeLayerPLNImpl>, FakeLayerPLN);
-REG_FAKE_FACTORY_FOR(Cpu::ImplFactory<FakeLayerBLKImpl>, FakeLayerBLK);
-
-
-InferenceEngine::IExtensionPtr make_FakeExtensions() {
-    return InferenceEngine::IExtensionPtr(new FakeExtensions());
-}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp
deleted file mode 100644
index b3b44fb37ca..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-﻿// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct fill_test_params {
-    std::string                 precision;
-    InferenceEngine::SizeVector out_shape;
-    float                       value;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNCPUExtFillTests : public TestsCommon, public WithParamInterface<fill_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Fill_net" version="2" precision="_IIDXP_" batch="1">
-    <layers>
-        <layer name="dims" type="Input" precision="I32" id="1">
-            <output>
-                <port id="1">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="value" type="Input" precision="_IIDXP_" id="2">
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="Fill" precision="_IIDXP_">
-            <data/>
-            <input>
-                <port id="1">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-                <port id="2">
-                    <dim>1</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(fill_test_params p) {
-        std::string model = model_t;
-        std::string out_shape;
-
-        REPLACE_WITH_STR(model, "_IIDXP_", p.precision);
-        for (size_t i = 0; i < p.out_shape.size(); i++) {
-            out_shape += "<dim>";
-            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_OUT_", out_shape);
-        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.out_shape.size());
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            fill_test_params p = ::testing::WithParamInterface<fill_test_params>::GetParam();
-            std::string model = getModel(p);
-
-                        InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            // Input Data
-            InferenceEngine::Blob::Ptr dims;
-            InferenceEngine::SizeVector vector_dim(1, p.out_shape.size());
-            dims = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, vector_dim, InferenceEngine::TensorDesc::getLayoutByDims(vector_dim) });
-            dims->allocate();
-            for (size_t i = 0; i < p.out_shape.size(); i++) {
-                static_cast<int32_t*>(dims->buffer())[i] = static_cast<int32_t>(p.out_shape[i]);
-            }
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(dims.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            InferenceEngine::BlobMap srcs;
-            InferenceEngine::Blob::Ptr value_scalar;
-            InferenceEngine::SizeVector value_scalar_dim(1, 1);
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-            if (p.precision == "I32") {
-                value_scalar = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, value_scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(value_scalar_dim) });
-                value_scalar->allocate();
-                static_cast<int32_t*>(value_scalar->buffer())[0] = static_cast<int32_t>(p.value);
-                auto * value_scalarPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(value_scalar.get());
-                if (value_scalarPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("dims", dims));
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("value", value_scalar));
-
-                // Output Blob
-                InferenceEngine::TBlob<int32_t>::Ptr output;
-                output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                // Output Reference
-                InferenceEngine::TBlob<int32_t> dst_ref(item.second->getTensorDesc());
-                dst_ref.allocate();
-                std::fill_n(static_cast<int32_t*>(dst_ref.data()), dst_ref.size(), static_cast<int32_t>(p.value));
-
-                // Infer
-                graph.Infer(srcs, outputBlobs);
-                for (int i = 0; i < dst_ref.size(); i++) {
-                    if(dst_ref.data()[i] != (*output).data()[i])
-                        FAIL() << "The difference between res_ptr[i] and ref_ptr[i]";
-                }
-            } else if (p.precision == "FP32") {
-                value_scalar = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, value_scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(value_scalar_dim) });
-                value_scalar->allocate();
-                static_cast<float*>(value_scalar->buffer())[0] = p.value;
-                auto * value_scalarPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(value_scalar.get());
-                if (value_scalarPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("dims", dims));
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("value", value_scalar));
-
-                // Output Blob
-                InferenceEngine::TBlob<float>::Ptr output;
-                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                // Output Reference
-                InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-                dst_ref.allocate();
-                std::fill_n(static_cast<float*>(dst_ref.data()), dst_ref.size(), p.value);
-
-                // Infer
-                graph.Infer(srcs, outputBlobs);
-                compare(*output, dst_ref);
-            } else {
-                return;
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtFillTests, TestsFill) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsFill, MKLDNNCPUExtFillTests,
-            ::testing::Values(
-// Params: precision, value, out_shape
-                fill_test_params{ "I32", { 1 }, 1.f },
-                fill_test_params{ "I32", { 1, 3, 1 }, 1.f },
-                fill_test_params{ "I32", { 2, 3, 6 }, -1.f },
-                fill_test_params{"FP32", { 2, 3, 6 }, -1.f },
-                fill_test_params{"FP32", { 1 }, 1.f },
-                fill_test_params{"FP32", { 1, 3, 1, 2 }, .5f },
-                fill_test_params{"FP32", { 4, 3, 2, 5, 4, 2 }, .25f }
-            ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
deleted file mode 100644
index 42276c246f4..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
+++ /dev/null
@@ -1,684 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct gather_test_params {
-    std::string inIdxPrecision;
-    InferenceEngine::SizeVector inDict;
-    InferenceEngine::SizeVector inIdx;
-
-    int axis;
-    InferenceEngine::SizeVector out;
-
-    size_t num_prim_desc;
-    int selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_gather(InferenceEngine::TBlob<data_t> &srcIdx, InferenceEngine::TBlob<float> &srcDct, InferenceEngine::TBlob<float> &dst, size_t axis) {
-    size_t i, j;
-    const data_t *src_dataIdx = srcIdx.data();
-    float* src_dataDict = srcDct.data();
-    float *dst_data = dst.data();
-    size_t src_size = srcIdx.size();
-
-    std::vector<size_t> dictionary_dims = srcDct.getTensorDesc().getDims();
-
-    //  Find number of dictionaries, index range and data length
-    size_t numDictionaries = 1;
-    for (i = 0; i < axis; i++)
-        numDictionaries *= dictionary_dims[i];
-    size_t indexRange = dictionary_dims[axis];
-    size_t dataLength = 1;
-    for (i = axis + 1; i < dictionary_dims.size(); i++)
-        dataLength *= dictionary_dims[i];
-
-    //  The gathering process
-    for (i = 0; i < src_size; i++) {
-        unsigned int idx = static_cast<unsigned int>(src_dataIdx[i]);
-
-        //  Index clipping
-        if (idx < indexRange) {
-            //  Copying data to destination from Dictionary
-            for (j = 0; j < numDictionaries; j++) {
-                memcpy(&dst_data[dataLength * (i + j * src_size)],
-                       &src_dataDict[dataLength * (idx + j * indexRange)], sizeof(float) * dataLength);
-            }
-        } else {
-            for (j = 0; j < numDictionaries; j++) {
-                std::fill_n(&dst_data[dataLength * (i + j * src_size)], dataLength, 0.0f);
-            }
-        }
-    }
-}
-
-class MKLDNNCPUExtGatherTests: public TestsCommon, public WithParamInterface<gather_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Gather_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputDictionary" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IDICT_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputText" type="Input" precision="_IIDXP_" id="2">
-            <output>
-                <port id="2">
-                    _IIDX_
-                </port>
-            </output>
-        </layer>
-        <layer name="gather" id="3" type="Gather" precision="FP32">
-            <data axis="_AX_"/>
-            <input>
-                <port id="1">
-                    _IDICT_
-                </port>
-                <port id="2">
-                    _IIDX_
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(gather_test_params p) {
-        std::string model = model_t;
-        std::string inIdx = "";
-        std::string inDict;
-        std::string out = "";
-
-        for (auto& idx : p.inIdx) {
-            inIdx += "<dim>";
-            inIdx += std::to_string(idx) + "</dim>\n";
-        }
-
-        for (auto& dct : p.inDict) {
-            inDict += "<dim>";
-            inDict += std::to_string(dct) + "</dim>\n";
-        }
-
-        for (auto& dst : p.out) {
-            out += "<dim>";
-            out += std::to_string(dst) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_IIDXP_", p.inIdxPrecision);
-        REPLACE_WITH_STR(model, "_IIDX_", inIdx);
-        REPLACE_WITH_STR(model, "_IDICT_", inDict);
-        REPLACE_WITH_NUM(model, "_AX_", p.axis);
-        REPLACE_WITH_STR(model, "_OUT_", out);
-
-        return model;
-    }
-
-    template <typename data_t>
-    static void fill_data_dbgval(data_t *data, size_t size) {
-        for (size_t i = 0; i < size; i++) {
-            data[i] = static_cast<data_t>(i & (sizeof(data_t) * 8 - 1));
-        }
-    }
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            gather_test_params p = ::testing::WithParamInterface<gather_test_params>::GetParam();
-            std::string model = getModel(p);
-
-                        InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-
-            for (auto &node : nodes) {
-                if (node->getName() == "gather") {
-                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                              node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            // Input Dictionary
-            InferenceEngine::Blob::Ptr srcDict = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.inDict, InferenceEngine::TensorDesc::getLayoutByDims(p.inDict) });
-            srcDict->allocate();
-            fill_data(srcDict->buffer(), srcDict->size());
-            auto * srcDictPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcDict.get());
-            if (srcDictPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            // Input Indexes
-            InferenceEngine::Blob::Ptr srcIdx;
-            if (p.inIdxPrecision == "I32") {
-                srcIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.inIdx, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdx) });
-                srcIdx->allocate();
-                fill_data_dbgval(static_cast<int32_t*>(srcIdx->buffer()), srcIdx->size());
-                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(srcIdx.get());
-                if (srcIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-                // Check results
-                ref_gather(*srcIdxPtr, *srcDictPtr, dst_ref, p.axis);
-            }
-            else if (p.inIdxPrecision == "FP32") {
-                srcIdx = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.inIdx, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdx) });
-                srcIdx->allocate();
-                fill_data(srcIdx->buffer(), srcIdx->size());
-                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcIdx.get());
-                if (srcIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-
-                // Check results
-                ref_gather(*srcIdxPtr, *srcDictPtr, dst_ref, p.axis);
-            }
-            else if (p.inIdxPrecision == "U16") {
-                srcIdx = InferenceEngine::make_shared_blob<uint16_t>({ InferenceEngine::Precision::U16, p.inIdx, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdx) });
-                srcIdx->allocate();
-                fill_data_dbgval(static_cast<uint16_t*>(srcIdx->buffer()), srcIdx->size());
-                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<uint16_t>*>(srcIdx.get());
-                if (srcIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<uint16_t>.";
-
-                // Check results
-                ref_gather(*srcIdxPtr, *srcDictPtr, dst_ref, p.axis);
-            }
-            else if (p.inIdxPrecision == "I16") {
-                srcIdx = InferenceEngine::make_shared_blob<int16_t>({ InferenceEngine::Precision::I16, p.inIdx, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdx) });
-                srcIdx->allocate();
-                fill_data_dbgval(static_cast<int16_t*>(srcIdx->buffer()), srcIdx->size());
-                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<int16_t>*>(srcIdx.get());
-                if (srcIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int16_t>.";
-
-                // Check results
-                ref_gather(*srcIdxPtr, *srcDictPtr, dst_ref, p.axis);
-            }
-            else if (p.inIdxPrecision == "U8") {
-                srcIdx = InferenceEngine::make_shared_blob<uint8_t>({ InferenceEngine::Precision::U8, p.inIdx, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdx) });
-                srcIdx->allocate();
-                fill_data_dbgval(static_cast<uint8_t*>(srcIdx->buffer()), srcIdx->size());
-                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<uint8_t>*>(srcIdx.get());
-                if (srcIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<uint8_t>.";
-
-                // Check results
-                ref_gather(*srcIdxPtr, *srcDictPtr, dst_ref, p.axis);
-            }
-            else if (p.inIdxPrecision == "I8") {
-                srcIdx = InferenceEngine::make_shared_blob<int8_t>({ InferenceEngine::Precision::I8, p.inIdx, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdx) });
-                srcIdx->allocate();
-                fill_data_dbgval(static_cast<int8_t*>(srcIdx->buffer()), srcIdx->size());
-                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<int8_t>*>(srcIdx.get());
-                if (srcIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int8_t>.";
-
-                // Check results
-                ref_gather(*srcIdxPtr, *srcDictPtr, dst_ref, p.axis);
-            }
-            else {
-                return;
-            }
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputDictionary", srcDict));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputText", srcIdx));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtGatherTests, TestsGather) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsGather, MKLDNNCPUExtGatherTests,
-            ::testing::Values(
-// Params: inIdxPrecision, inDict, inIdx, axis, out, num_prim_desc, selectedType
-                gather_test_params{  "I32",{ 31 },{}, 0,{}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{ "FP32",{ 31 },{}, 0,{}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{ "FP32",{ 1, 31, 4 },{ 10 }, 1,{ 1, 10, 4 }, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{ "FP32",{ 31, 7 },{ 1,12,1 }, 0,{ 1, 12, 1, 7 }, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{ "FP32", {71, 16}, {1, 12, 256}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {71, 16}, {1, 12, 256}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {71, 16}, {12, 256}, 0, {12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {2, 5, 6}, {3, 4}, 0, {3, 4, 5, 6}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {5, 1}, {3, 4}, 0, {3, 4, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{ "FP32", {71, 16}, {1, 12, 256}, 1, {1, 71, 12, 256}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {2, 5, 6}, {1, 1, 3, 4}, 1, {2, 3, 4, 6}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {2, 5, 6}, {1, 1, 3, 4}, 2, {2, 5, 3, 4}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I32", {6, 13, 10, 3}, {12, 4, 9, 8}, 1, {6, 12, 4, 9, 8, 10, 3}, 1, MKLDNNPlugin::impl_desc_type::unknown }
-            ));
-
-
-
-
-struct gatherTF_test_params {
-    InferenceEngine::SizeVector dct_dim;
-    std::vector<float> dct;
-
-    InferenceEngine::SizeVector in_dim;
-    std::vector<int32_t> in;
-
-    int axis;
-
-    InferenceEngine::SizeVector ref_dim;
-    std::vector<float> ref;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNCPUExtGatherTFTests : public TestsCommon, public WithParamInterface<gatherTF_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Gather_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputDictionary" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IDICT_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputText" type="Input" precision="I32" id="2">
-            <output>
-                <port id="2">
-                    _IIDX_
-                </port>
-            </output>
-        </layer>
-        <layer name="gather" id="3" type="Gather" precision="FP32">
-            <data axis="_AX_"/>
-            <input>
-                <port id="1">
-                    _IDICT_
-                </port>
-                <port id="2">
-                    _IIDX_
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(gatherTF_test_params p) {
-        std::string model = model_t;
-        std::string inIdx;
-        std::string inDict;
-        std::string out;
-
-        for (auto& idx : p.in_dim) {
-            inIdx += "<dim>";
-            inIdx += std::to_string(idx) + "</dim>\n";
-        }
-
-        for (auto& dct : p.dct_dim) {
-            inDict += "<dim>";
-            inDict += std::to_string(dct) + "</dim>\n";
-        }
-
-        for (auto& dst : p.ref_dim) {
-            out += "<dim>";
-            out += std::to_string(dst) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_IIDX_", inIdx);
-        REPLACE_WITH_STR(model, "_IDICT_", inDict);
-        REPLACE_WITH_NUM(model, "_AX_", p.axis);
-        REPLACE_WITH_STR(model, "_OUT_", out);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            gatherTF_test_params p = ::testing::WithParamInterface<gatherTF_test_params>::GetParam();
-            std::string model = getModel(p);
-
-                        InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Input Indexes
-            InferenceEngine::Blob::Ptr srcIdx;
-            srcIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.in_dim, InferenceEngine::TensorDesc::getLayoutByDims(p.in_dim) });
-            srcIdx->allocate();
-            memcpy(static_cast<int32_t*>(srcIdx->buffer()), &p.in[0], sizeof(int32_t)*p.in.size());
-            auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(srcIdx.get());
-            if (srcIdxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            //  Input Dictionary
-            InferenceEngine::Blob::Ptr srcDict = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.dct_dim, InferenceEngine::TensorDesc::getLayoutByDims(p.dct_dim) });
-            srcDict->allocate();
-            memcpy(srcDict->buffer(), &p.dct[0], sizeof(float)*p.dct.size());
-            auto * srcDictPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcDict.get());
-            if (srcDictPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            //  Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            //  Infer
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputDictionary", srcDict));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputText", srcIdx));
-            graph.Infer(srcs, outputBlobs);
-
-            //  Check results
-            if (memcmp((*output).data(), &p.ref[0], output->byteSize()) != 0)
-                FAIL() << "Wrong result with compare TF reference!";
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtGatherTFTests, TestsGather) {}
-
-//  Test data vectors
-std::vector<float> dict = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f };
-std::vector<float> ref_in0_a0_d223 = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }; // 2x2x2x3
-std::vector<float> ref_in0_a2_d232 = { 1.f, 2.f, 2.f, 1.f, 3.f, 4.f, 4.f, 3.f, 5.f, 6.f, 6.f, 5.f, 7.f, 8.f, 8.f, 7.f, 9.f, 10.f, 10.f, 9.f, 11.f, 12.f, 12.f, 11.f }; // 2x3x2x2
-std::vector<float> ref_in1_a0_d322 = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f }; // 2x2x2x2
-std::vector<float> ref_in1_a1_d232 = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f }; // 2x2x2x2
-std::vector<float> ref_in1_a2_d223 = { 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f }; // 2x2x2x2
-
-INSTANTIATE_TEST_CASE_P(
-        TestsGather, MKLDNNCPUExtGatherTFTests,
-        ::testing::Values(
-// Params: dct_dim, dct, in_dim, in, axis, ref_dim, ref
-        gatherTF_test_params{ { 3,2 }, {1.0, 1.2, 2.3, 3.4, 4.5, 5.7 }, { 2, 2 }, { 0, 1, 1, 2 },0, { 2, 2, 2 }, {1.0, 1.2, 2.3, 3.4,2.3, 3.4,4.5, 5.7 } },
-        gatherTF_test_params{ { 3,3 },{ 1.0, 1.2, 1.9,2.3, 3.4, 3.9,4.5, 5.7, 5.9 }, { 1, 2 }, { 0, 2 },1,{ 3, 2 },{ 1.0, 1.9,2.3, 3.9,4.5, 5.9 } },
-        gatherTF_test_params{ { 2, 2, 3 }, dict, { 2, 2 }, { 0, 1, 1, 0 },0, { 2, 2, 2, 3 }, ref_in0_a0_d223 },
-        gatherTF_test_params{ { 2, 2, 3 }, dict,{ 2, 2 }, { 0, 1, 1, 0 },-3, { 2, 2, 2, 3 }, ref_in0_a0_d223 },
-        gatherTF_test_params{ { 2, 3, 2 }, dict, { 2, 2 }, { 0, 1, 1, 0 },2, { 2, 3, 2, 2 }, ref_in0_a2_d232 },
-        gatherTF_test_params{ { 2, 3, 2 }, dict,{ 2, 2 }, { 0, 1, 1, 0 },-1, { 2, 3, 2, 2 }, ref_in0_a2_d232 },
-        gatherTF_test_params{ { 3, 2, 2 }, dict,{ 2, 2 }, { 0, 1, 2, 1 }, 0, { 2, 2, 2, 2 }, ref_in1_a0_d322 },
-        gatherTF_test_params{ { 3, 2, 2 }, dict,{ 2, 2 }, { 0, 1, 2, 1 },-3, { 2, 2, 2, 2 }, ref_in1_a0_d322 },
-        gatherTF_test_params{ { 2, 3, 2 }, dict,{ 2, 2 }, { 0, 1, 2, 1 }, 1, { 2, 2, 2, 2 }, ref_in1_a1_d232 },
-        gatherTF_test_params{ { 2, 3, 2 }, dict,{ 2, 2 }, { 0, 1, 2, 1 },-2, { 2, 2, 2, 2 }, ref_in1_a1_d232 },
-        gatherTF_test_params{ { 2, 2, 3 }, dict,{ 2, 2 }, { 0, 1, 2, 1 }, 2, { 2, 2, 2, 2 }, ref_in1_a2_d223 },
-        gatherTF_test_params{ { 2, 2, 3 }, dict,{ 2, 2 }, { 0, 1, 2, 1 },-1, { 2, 2, 2, 2 }, ref_in1_a2_d223 }));
-
-
-class MKLDNNCPUExtGatherHolesTests : public TestsCommon, public WithParamInterface<gatherTF_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Gather_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputDictionary" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="InputText" type="Input" precision="I32" id="2">
-            <output>
-                <port id="2">
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="Input3" type="Input" precision="FP32" id="3">
-            <output>
-                <port id="3">
-                    <dim>2</dim>
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="gather" id="4" type="Gather" precision="FP32">
-            <data axis="0"/>
-            <input>
-                <port id="1">
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-                <port id="2">
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>2</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="con" id="5" type="Concat" precision="FP32">
-            <concat_data axis="1"/>
-            <input>
-                <port id="1">
-                    <dim>2</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-                <port id="2">
-                    <dim>2</dim>
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>2</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="4" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="4" to-port="2"/>
-        <edge from-layer="4" from-port="3" to-layer="5" to-port="1"/>
-        <edge from-layer="3" from-port="3" to-layer="5" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(gatherTF_test_params p) {
-        std::string model = model_t;
-        std::string inIdx;
-        std::string inDict;
-        std::string out;
-
-        for (auto& idx : p.in_dim) {
-            inIdx += "<dim>";
-            inIdx += std::to_string(idx) + "</dim>\n";
-        }
-
-        for (auto& dct : p.dct_dim) {
-            inDict += "<dim>";
-            inDict += std::to_string(dct) + "</dim>\n";
-        }
-
-        for (auto& dst : p.ref_dim) {
-            out += "<dim>";
-            out += std::to_string(dst) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_OUTC_", inIdx);
-        REPLACE_WITH_STR(model, "_IDICT_", inDict);
-        REPLACE_WITH_NUM(model, "_AX_", p.axis);
-        REPLACE_WITH_STR(model, "_OUT_", out);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            gatherTF_test_params p = ::testing::WithParamInterface<gatherTF_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Input Indexes
-            InferenceEngine::Blob::Ptr srcIdx;
-            int32_t in_size = 4;
-            InferenceEngine::SizeVector in_dim = {2, 2};
-            srcIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, in_dim, InferenceEngine::TensorDesc::getLayoutByDims(in_dim) });
-            srcIdx->allocate();
-            memcpy(static_cast<int32_t*>(srcIdx->buffer()), &p.in[0], sizeof(int32_t)*in_size);
-            auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(srcIdx.get());
-            if (srcIdxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            //  Input Dictionary
-            InferenceEngine::Blob::Ptr srcDict = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.dct_dim, InferenceEngine::TensorDesc::getLayoutByDims(p.dct_dim) });
-            srcDict->allocate();
-            memcpy(srcDict->buffer(), &p.dct[0], sizeof(float)*p.dct.size());
-            auto * srcDictPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcDict.get());
-            if (srcDictPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            //  Input3
-            InferenceEngine::SizeVector src3_dim = { 2, 1, 2, 2 };
-            InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, src3_dim, InferenceEngine::TensorDesc::getLayoutByDims(src3_dim) });
-            src3->allocate();
-            memcpy(src3->buffer(), &p.dct[0], sizeof(float) * src3_dim.size());
-            auto* src3Ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(src3.get());
-            if (src3Ptr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            //  Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            //  Infer
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputDictionary", srcDict));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputText", srcIdx));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("Input3", src3));
-            graph.Infer(srcs, outputBlobs);
-
-            //  Check results
-            if (memcmp((*output).data(), &p.ref[0], 8 * sizeof(float)) != 0)
-                FAIL() << "Wrong result with compare TF reference!";
-            if (memcmp(&((float*)(*output).data())[12], &p.ref[8], 8 * sizeof(float)) != 0)
-                FAIL() << "Wrong result with compare TF reference!";
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtGatherHolesTests, TestsGather) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsGather, MKLDNNCPUExtGatherHolesTests,
-    ::testing::Values(
-        // Params: dct_dim, dct, in_dim, in, axis, ref_dim, ref
-        gatherTF_test_params{ { 3, 2, 2 }, dict,{ 1, 5, 2, 2 },{ 0, 1, 2, 1 }, 1,{ 2, 2, 2, 2 }, ref_in1_a0_d322 }));
-
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
deleted file mode 100644
index a46bb4f2d5a..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
+++ /dev/null
@@ -1,1521 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include <ie_iextension.h>
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-#include "tests_common.hpp"
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-class FakeGenericPrimitiveImpl : public InferenceEngine::ILayerExecImpl {
-public:
-    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc *resp) noexcept override {
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc *resp) noexcept override {
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::ResponseDesc *resp) noexcept override {
-        return InferenceEngine::NOT_IMPLEMENTED;
-    }
-};
-
-class FakeGenericPrimitiveFactory : public InferenceEngine::ILayerImplFactory {
-public:
-    // First implementation has more priority than next
-    InferenceEngine::StatusCode getImplementations(std::vector<InferenceEngine::ILayerImpl::Ptr>& impls, InferenceEngine::ResponseDesc *resp) noexcept override {
-        impls.push_back(InferenceEngine::ILayerImpl::Ptr(new FakeGenericPrimitiveImpl()));
-        return InferenceEngine::OK;
-    }
-};
-
-class DoublePrimitiveImpl : public InferenceEngine::ILayerExecImpl {
-public:
-    DoublePrimitiveImpl(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc *resp) noexcept override {
-        InferenceEngine::LayerConfig config;
-        config.dynBatchSupport = true;
-        if (cnnLayer->outData.size() != 1 && cnnLayer->insData.size() != 1)
-            return InferenceEngine::GENERAL_ERROR;
-        InferenceEngine::DataConfig cfg;
-        cfg.constant = false;
-        cfg.inPlace = 0;
-        InferenceEngine::SizeVector order;
-        for(size_t i = 0; i < cnnLayer->outData[0]->getTensorDesc().getDims().size(); i++) {
-            order.push_back(i);
-        }
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->outData[0]->getTensorDesc().getPrecision(),
-                              cnnLayer->outData[0]->getTensorDesc().getDims(),
-                              {cnnLayer->outData[0]->getTensorDesc().getDims(), order});
-        config.outConfs.push_back(cfg);
-        config.inConfs.push_back(cfg);
-        conf.push_back(config);
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc *resp) noexcept override {
-        for(auto input : config.inConfs) {
-            if (input.inPlace < 0)
-                return InferenceEngine::GENERAL_ERROR;
-            if (input.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        for(auto output : config.outConfs) {
-            if (output.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::ResponseDesc *resp) noexcept override {
-        const float *src_data = inputs[0]->buffer();
-        float *dst_data = outputs[0]->buffer();
-        if (src_data != dst_data)
-            return InferenceEngine::GENERAL_ERROR;
-
-        size_t data_size = inputs[0]->size();
-        for (size_t i = 0; i < data_size; i++) {
-            dst_data[i] = src_data[i]*2;
-        }
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer* cnnLayer;
-};
-
-class ConstPrimitiveImpl : public InferenceEngine::ILayerExecImpl {
-public:
-    ConstPrimitiveImpl(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc *resp) noexcept override {
-        InferenceEngine::LayerConfig config;
-        config.dynBatchSupport = 0;
-        if (cnnLayer->outData.size() != 1 && cnnLayer->insData.size() != 1)
-            return InferenceEngine::GENERAL_ERROR;
-        InferenceEngine::DataConfig cfg;
-        cfg.constant = true;
-        // Cannot be in-place because memory will change a memory.
-        cfg.inPlace = -1;
-        InferenceEngine::SizeVector order;
-        for(size_t i = 0; i < cnnLayer->outData[0]->getTensorDesc().getDims().size(); i++) {
-            order.push_back(i);
-        }
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->outData[0]->getTensorDesc().getPrecision(),
-                                               cnnLayer->outData[0]->getTensorDesc().getDims(),
-                                               {cnnLayer->outData[0]->getTensorDesc().getDims(), order});
-        config.outConfs.push_back(cfg);
-        config.inConfs.push_back(cfg);
-        conf.push_back(config);
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc *resp) noexcept override {
-        if (config.dynBatchSupport)
-            return InferenceEngine::NOT_IMPLEMENTED;
-        for(auto input : config.inConfs) {
-            if (input.inPlace >= 0)
-                return InferenceEngine::GENERAL_ERROR;
-            if (!input.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        for(auto output : config.outConfs) {
-            if (output.inPlace >= 0)
-                return InferenceEngine::GENERAL_ERROR;
-            if (!output.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::ResponseDesc *resp) noexcept override {
-        float *dst_data = outputs[0]->buffer();
-
-        size_t data_size = outputs[0]->size();
-        for (size_t i = 0; i < data_size; i++) {
-            dst_data[i] = 2;
-        }
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer *cnnLayer;
-};
-
-class ConstPrimitiveFactory : public InferenceEngine::ILayerImplFactory {
-public:
-    ConstPrimitiveFactory(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    // First implementation has more priority than next
-    InferenceEngine::StatusCode getImplementations(std::vector<InferenceEngine::ILayerImpl::Ptr>& impls, InferenceEngine::ResponseDesc *resp) noexcept override {
-        impls.push_back(InferenceEngine::ILayerImpl::Ptr(new ConstPrimitiveImpl(cnnLayer)));
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer * cnnLayer;
-};
-
-class DoublePrimitiveFactory : public InferenceEngine::ILayerImplFactory {
-public:
-    DoublePrimitiveFactory(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    // First implementation has more priority than next
-    InferenceEngine::StatusCode getImplementations(std::vector<InferenceEngine::ILayerImpl::Ptr>& impls, InferenceEngine::ResponseDesc *resp) noexcept override {
-        impls.push_back(InferenceEngine::ILayerImpl::Ptr(new DoublePrimitiveImpl(cnnLayer)));
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer * cnnLayer;
-};
-
-class TwoDifferentOutputsImpl : public InferenceEngine::ILayerExecImpl {
-public:
-    TwoDifferentOutputsImpl(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc *resp) noexcept override {
-        InferenceEngine::LayerConfig config;
-        config.dynBatchSupport = 0;
-        if (cnnLayer->outData.size() != 2 && cnnLayer->insData.size() != 1)
-            return InferenceEngine::GENERAL_ERROR;
-        InferenceEngine::DataConfig cfg;
-        cfg.constant = false;
-        cfg.inPlace = -1;
-        InferenceEngine::SizeVector order;
-        for(size_t i = 0; i < cnnLayer->outData[0]->getTensorDesc().getDims().size(); i++) {
-            order.push_back(i);
-        }
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->outData[0]->getTensorDesc().getPrecision(),
-                                               cnnLayer->outData[0]->getTensorDesc().getDims(),
-                                               {cnnLayer->outData[0]->getTensorDesc().getDims(), order});
-        config.outConfs.push_back(cfg);
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->outData[1]->getTensorDesc().getPrecision(),
-                                               cnnLayer->outData[1]->getTensorDesc().getDims(),
-                                               {cnnLayer->outData[1]->getTensorDesc().getDims(), order});
-        config.outConfs.push_back(cfg);
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->insData[0].lock()->getTensorDesc().getPrecision(),
-                              cnnLayer->insData[0].lock()->getTensorDesc().getDims(),
-                              {cnnLayer->insData[0].lock()->getTensorDesc().getDims(), order});
-        config.inConfs.push_back(cfg);
-        conf.push_back(config);
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc *resp) noexcept override {
-        if (config.dynBatchSupport)
-            return InferenceEngine::NOT_IMPLEMENTED;
-        for(auto input : config.inConfs) {
-            if (input.inPlace >= 0)
-                return InferenceEngine::GENERAL_ERROR;
-            if (input.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        for(auto output : config.outConfs) {
-            if (output.inPlace >= 0)
-                return InferenceEngine::GENERAL_ERROR;
-            if (output.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::ResponseDesc *resp) noexcept override {
-        const float *src_data = inputs[0]->buffer();
-        float *dst_data0 = outputs[0]->buffer();
-        float *dst_data1 = outputs[1]->buffer();
-
-        size_t out_data_size0 = outputs[0]->size();
-        size_t out_data_size1 = outputs[1]->size();
-        for (size_t i = 0; i < out_data_size0; i++) {
-            dst_data0[i] = (*(src_data++))*2;
-        }
-
-        for (size_t i = 0; i < out_data_size1; i++) {
-            dst_data1[i] = (*(src_data++))*3;
-        }
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer* cnnLayer;
-};
-
-class TwoDifferentOutputsFactory : public InferenceEngine::ILayerImplFactory {
-public:
-    TwoDifferentOutputsFactory(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    // First implementation has more priority than next
-    InferenceEngine::StatusCode getImplementations(std::vector<InferenceEngine::ILayerImpl::Ptr>& impls, InferenceEngine::ResponseDesc *resp) noexcept override {
-        impls.push_back(InferenceEngine::ILayerImpl::Ptr(new TwoDifferentOutputsImpl(cnnLayer)));
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer * cnnLayer;
-};
-
-class CustomConcatImpl : public InferenceEngine::ILayerExecImpl {
-public:
-    CustomConcatImpl(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc *resp) noexcept override {
-        InferenceEngine::LayerConfig config;
-        config.dynBatchSupport = 0;
-        if (cnnLayer->outData.size() != 1 && cnnLayer->insData.size() != 2)
-            return InferenceEngine::GENERAL_ERROR;
-        InferenceEngine::DataConfig cfg;
-        cfg.constant = false;
-        cfg.inPlace = -1;
-        InferenceEngine::SizeVector order;
-        for(size_t i = 0; i < cnnLayer->outData[0]->getTensorDesc().getDims().size(); i++) {
-            order.push_back(i);
-        }
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->outData[0]->getTensorDesc().getPrecision(),
-                                               cnnLayer->outData[0]->getTensorDesc().getDims(),
-                                               {cnnLayer->outData[0]->getTensorDesc().getDims(), order});
-        config.outConfs.push_back(cfg);
-        cfg.inPlace = 0;
-        InferenceEngine::SizeVector dims = cnnLayer->insData[0].lock()->getTensorDesc().getDims();
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->insData[0].lock()->getTensorDesc().getPrecision(),
-                                               dims, {dims, order});
-        size_t dataSize = std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies<size_t>());
-        config.inConfs.push_back(cfg);
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->insData[1].lock()->getTensorDesc().getPrecision(),
-                                               cnnLayer->insData[1].lock()->getTensorDesc().getDims(),
-                                               {cnnLayer->insData[1].lock()->getTensorDesc().getDims(), order,
-                                                dataSize});
-        config.inConfs.push_back(cfg);
-        conf.push_back(config);
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc *resp) noexcept override {
-        if (config.dynBatchSupport)
-            return InferenceEngine::NOT_IMPLEMENTED;
-        for(auto input : config.inConfs) {
-            if (input.inPlace < 0)
-                return InferenceEngine::GENERAL_ERROR;
-            if (input.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        for(auto output : config.outConfs) {
-            if (output.inPlace >= 0)
-                return InferenceEngine::GENERAL_ERROR;
-            if (output.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        return InferenceEngine::OK;
-    }
-
-    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
-                                        std::vector<InferenceEngine::Blob::Ptr>& outputs,
-                                        InferenceEngine::ResponseDesc *resp) noexcept override {
-        return InferenceEngine::OK;
-    }
-private:
-    InferenceEngine::CNNLayer * cnnLayer;
-};
-
-class CustomConcatFactory : public InferenceEngine::ILayerImplFactory {
-public:
-    CustomConcatFactory(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    // First implementation has more priority than next
-    InferenceEngine::StatusCode getImplementations(std::vector<InferenceEngine::ILayerImpl::Ptr>& impls, InferenceEngine::ResponseDesc *resp) noexcept override {
-        impls.push_back(InferenceEngine::ILayerImpl::Ptr(new CustomConcatImpl(cnnLayer)));
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer * cnnLayer;
-};
-
-class CustomSplitImpl : public InferenceEngine::ILayerExecImpl {
-public:
-    CustomSplitImpl(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc *resp) noexcept override {
-        InferenceEngine::LayerConfig config;
-        config.dynBatchSupport = 0;
-        if (cnnLayer->outData.size() != 2 && cnnLayer->insData.size() != 1)
-            return InferenceEngine::GENERAL_ERROR;
-        InferenceEngine::DataConfig cfg;
-        cfg.constant = false;
-        cfg.inPlace = 0;
-        InferenceEngine::SizeVector order;
-        for(size_t i = 0; i < cnnLayer->outData[0]->getTensorDesc().getDims().size(); i++) {
-            order.push_back(i);
-        }
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->outData[0]->getTensorDesc().getPrecision(),
-                                               cnnLayer->outData[0]->getTensorDesc().getDims(),
-                                               {cnnLayer->outData[0]->getTensorDesc().getDims(), order});
-        config.outConfs.push_back(cfg);
-        size_t dataSize = std::accumulate(std::begin(cnnLayer->outData[0]->getTensorDesc().getDims()),
-                                          std::end(cnnLayer->outData[0]->getTensorDesc().getDims()),
-                                          (size_t) 1, std::multiplies<size_t>());
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->outData[1]->getTensorDesc().getPrecision(),
-                                               cnnLayer->outData[1]->getTensorDesc().getDims(),
-                                               {cnnLayer->outData[1]->getTensorDesc().getDims(), order, dataSize});
-        config.outConfs.push_back(cfg);
-        cfg.inPlace = -1;
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->insData[0].lock()->getTensorDesc().getPrecision(),
-                                               cnnLayer->insData[0].lock()->getTensorDesc().getDims(),
-                                               {cnnLayer->insData[0].lock()->getTensorDesc().getDims(), order});
-        config.inConfs.push_back(cfg);
-        conf.push_back(config);
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc *resp) noexcept override {
-        if (config.dynBatchSupport)
-            return InferenceEngine::NOT_IMPLEMENTED;
-        for(auto input : config.inConfs) {
-            if (!input.inPlace)
-                return InferenceEngine::GENERAL_ERROR;
-            if (input.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        for(auto output : config.outConfs) {
-            if (output.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs,
-                                        std::vector<InferenceEngine::Blob::Ptr>& outputs,
-                                        InferenceEngine::ResponseDesc *resp) noexcept override {
-        return InferenceEngine::OK;
-    }
-private:
-    InferenceEngine::CNNLayer * cnnLayer;
-};
-
-class CustomSplitFactory : public InferenceEngine::ILayerImplFactory {
-public:
-    CustomSplitFactory(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    // First implementation has more priority than next
-    InferenceEngine::StatusCode getImplementations(std::vector<InferenceEngine::ILayerImpl::Ptr>& impls, InferenceEngine::ResponseDesc *resp) noexcept override {
-        impls.push_back(InferenceEngine::ILayerImpl::Ptr(new CustomSplitImpl(cnnLayer)));
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer * cnnLayer;
-};
-using fake_ext_factory = std::function<InferenceEngine::ILayerImplFactory*(const InferenceEngine::CNNLayer *)>;
-
-class FakeExtensionFabric : public InferenceEngine::Extensions::Cpu::MKLDNNExtensions {
-public:
-    FakeExtensionFabric() {
-        factories["CustomNewConvolution"] = [](const InferenceEngine::CNNLayer * cnnLayer) -> InferenceEngine::ILayerImplFactory* { return new FakeGenericPrimitiveFactory(); };
-        factories["NewDoubleLayer"] = [](const InferenceEngine::CNNLayer * cnnLayer) -> InferenceEngine::ILayerImplFactory* { return new DoublePrimitiveFactory(cnnLayer); };
-        factories["NewTwoDifferentOutputs"] = [](const InferenceEngine::CNNLayer * cnnLayer) -> InferenceEngine::ILayerImplFactory* { return new TwoDifferentOutputsFactory(cnnLayer); };
-        factories["ConstPrim"] = [](const InferenceEngine::CNNLayer * cnnLayer) -> InferenceEngine::ILayerImplFactory* { return new ConstPrimitiveFactory(cnnLayer); };
-        factories["CustomInPlaceConcat"] = [](const InferenceEngine::CNNLayer * cnnLayer) -> InferenceEngine::ILayerImplFactory* { return new CustomConcatFactory(cnnLayer); };
-        factories["CustomInPlaceSplit"] = [](const InferenceEngine::CNNLayer * cnnLayer) -> InferenceEngine::ILayerImplFactory* { return new CustomSplitFactory(cnnLayer); };
-    }
-
-    virtual ~FakeExtensionFabric() {
-        factories.clear();
-    }
-
-    void GetVersion(const InferenceEngine::Version *&versionInfo) const noexcept override {}
-    void Unload() noexcept override {}
-    InferenceEngine::StatusCode getPrimitiveTypes(char**& types, unsigned int& size, InferenceEngine::ResponseDesc* resp) noexcept override {
-        types = new char *[factories.size()];
-        size_t count = 0;
-        for (auto it = factories.begin(); it != factories.end(); it++, count ++) {
-            types[count] = new char[it->first.size() + 1];
-            std::copy(it->first.begin(), it->first.end(), types[count]);
-            types[count][it->first.size() ] = '\0';
-        }
-        return InferenceEngine::OK;
-    };
-    InferenceEngine::StatusCode getFactoryFor(InferenceEngine::ILayerImplFactory *&factory,
-                                              const InferenceEngine::CNNLayer *cnnLayer,
-                                              InferenceEngine::ResponseDesc *resp) noexcept override {
-        if (factories.find(cnnLayer->type) == factories.end()) {
-            std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
-            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            return InferenceEngine::NOT_FOUND;
-        }
-        factory = factories[cnnLayer->type](cnnLayer);
-        return InferenceEngine::OK;
-    }
-
-private:
-    std::map<std::string, fake_ext_factory> factories;
-};
-
-class MKLDNNGraphGenericTests: public TestsCommon {
-protected:
-    virtual void SetUp() {
-        TestsCommon::SetUp();
-        extension.reset(new FakeExtensionFabric());
-    }
-    std::shared_ptr<InferenceEngine::IExtension> extension;
-};
-
-TEST_F(MKLDNNGraphGenericTests, canGetPrimitiveDescriptorsList) {
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-    extMgr->AddExtension(extension);
-    std::shared_ptr<MKLDNNPlugin::MKLDNNNode> node;
-    InferenceEngine::DataPtr dataPtr;
-    dataPtr.reset(new InferenceEngine::Data("test", { InferenceEngine::Precision::FP32, {5, 4, 3, 1}, InferenceEngine::Layout::NCHW }));
-    InferenceEngine::CNNLayerPtr layerPtr;
-    layerPtr.reset(new InferenceEngine::CNNLayer({"name", "CustomNewConvolution", InferenceEngine::Precision::FP32}));
-    layerPtr->outData.push_back(dataPtr);
-
-    mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
-    MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache;
-    node.reset(MKLDNNPlugin::MKLDNNNode::factory().create(layerPtr, eng, extMgr, cache));
-    ASSERT_EQ(MKLDNNPlugin::Type::Generic, node->getType());
-
-    ASSERT_NO_THROW(node->getSupportedDescriptors());
-}
-
-template <typename data_t>
-void ref_double(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst) {
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    for (int i=0; i < src.size(); i++)
-        dst_data[i] = src_data[i]*2;
-}
-
-template <typename data_t>
-void ref_double_batch1(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst) {
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    for (int i= 0; i < src.size() / 2; i++)
-        dst_data[i] = src_data[i]*2;
-
-    for (int i= src.size() / 2; i < src.size(); i++)
-        dst_data[i] = 0;
-}
-
-template <typename data_t>
-void ref_twoDifferent(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst1, InferenceEngine::TBlob<data_t> &dst2) {
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data1 = dst1.data();
-    data_t *dst_data2 = dst2.data();
-
-    for (int i=0; i < dst1.size(); i++)
-        dst_data1[i] = (*(src_data++))*2;
-
-    for (int i=0; i < dst2.size(); i++)
-        dst_data2[i] = (*(src_data++))*6;
-}
-
-TEST_F(MKLDNNGraphGenericTests, DontCreateGPUGenericPrimitive) {
-    std::string model = R"V0G0N(
-        <Net Name="DoubleLayer_Only" version="2" precision="FP32" batch="1">
-            <layers>
-                <layer name="in1" type="Input" precision="FP32" id="0">
-                    <output>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="gpulayer" id="1" type="CustomGPUConvolution" precision="FP32">
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-            </edges>
-        </Net>
-        )V0G0N";
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-    extMgr->AddExtension(extension);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    ASSERT_THROW(graph.CreateGraph(network, extMgr), InferenceEngine::Exception);
-}
-
-TEST_F(MKLDNNGraphGenericTests, ExecuteConstGenericPrimitive) {
-    std::string model = R"V0G0N(
-        <Net Name="DoubleLayer_Only" version="2" precision="FP32" batch="1">
-            <layers>
-                <layer name="in1" type="Input" precision="FP32" id="0">
-                    <output>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="const_layer" id="1" type="ConstPrim" precision="FP32">
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-            </edges>
-        </Net>
-        )V0G0N";
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-    extMgr->AddExtension(extension);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network, extMgr);
-
-    InferenceEngine::SizeVector dims_src = {1, 3, 5, 5};
-
-    InferenceEngine::Blob::Ptr src =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-    src->allocate();
-    fill_data(src->buffer(), src->size());
-
-    InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-    if (srcPtr == nullptr)
-        FAIL() << "Cannot cast blob to TBlob<float>.";
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-    graph.Infer(srcs, outputBlobs);
-
-    InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-    dst_ref.allocate();
-
-    float * dst_data = dst_ref.buffer();
-    for (size_t i = 0; i < dst_ref.size(); i++) {
-        dst_data[i] = 2;
-    }
-
-    compare(*output, dst_ref);
-}
-
-TEST_F(MKLDNNGraphGenericTests, ExecuteGenericPrimitive) {
-    std::string model = R"V0G0N(
-        <Net Name="DoubleLayer_Only" version="2" precision="FP32" batch="1">
-            <layers>
-                <layer name="in1" type="Input" precision="FP32" id="0">
-                    <output>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="double_layer" id="1" type="NewDoubleLayer" precision="FP32">
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-            </edges>
-        </Net>
-        )V0G0N";
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-    extMgr->AddExtension(extension);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network, extMgr);
-
-    InferenceEngine::SizeVector dims_src = {1, 3, 5, 5};
-
-    InferenceEngine::Blob::Ptr src =
-           InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-    src->allocate();
-    fill_data(src->buffer(), src->size());
-
-    InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-    if (srcPtr == nullptr)
-        FAIL() << "Cannot cast blob to TBlob<float>.";
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-    dst_ref.allocate();
-
-    ref_double(*srcPtr, dst_ref);
-
-    compare(*output, dst_ref);
-}
-
-TEST_F(MKLDNNGraphGenericTests, ExecuteGenericPrimitiveWithTwoOutputs) {
-    std::string model = R"V0G0N(
-        <Net Name="DoubleLayer_Only" version="2" precision="FP32" batch="1">
-            <layers>
-                <layer name="in1" type="Input" precision="FP32" id="0">
-                    <output>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="two_diff_layer" id="1" type="NewTwoDifferentOutputs" precision="FP32">
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>1</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                        <port id="3">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="power" id="3" type="Power" precision="FP32">
-                    <power_data power="1" scale="2" shift="0"/>
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="con" id="2" type="Concat" precision="FP32">
-                    <concat_data axis="1"/>
-                    <input>
-                        <port id="4">
-                            <dim>1</dim>
-                            <dim>1</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                        <port id="5">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="6">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-                <edge from-layer="1" from-port="2" to-layer="2" to-port="4"/>
-                <edge from-layer="1" from-port="3" to-layer="3" to-port="1"/>
-                <edge from-layer="3" from-port="2" to-layer="2" to-port="5"/>
-            </edges>
-        </Net>
-        )V0G0N";
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-    extMgr->AddExtension(extension);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network, extMgr);
-
-    InferenceEngine::SizeVector dims_src = {1, 3, 5, 5};
-
-    InferenceEngine::Blob::Ptr src =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-    src->allocate();
-
-    float * data_src = src->buffer();
-    for (size_t i = 0; i < src->size(); i++)
-        data_src[i] = 1;
-    auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-    if (srcPtr == nullptr)
-        FAIL() << "Cannot cast blob to TBlob<float>.";
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-
-    auto it = out.begin();
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *it;
-    InferenceEngine::DataPtr data1 = item.second;
-
-    InferenceEngine::TensorDesc outputDesc1 = item.second->getTensorDesc();
-    InferenceEngine::TBlob<float>::Ptr output1;
-    output1 = InferenceEngine::make_shared_blob<float>(outputDesc1);
-    output1->allocate();
-    outputBlobs[item.first] = output1;
-
-    graph.Infer(srcs, outputBlobs);
-
-    float * data = outputBlobs.begin()->second->buffer();
-    for (size_t i = 0; i < 25; i++) {
-        ASSERT_EQ(*data, 2);
-        data++;
-    }
-    for (size_t i = 0; i < 50; i++) {
-        ASSERT_EQ(*data, 6);
-        data++;
-    }
-}
-
-TEST_F(MKLDNNGraphGenericTests, ExecuteGenericInPlaceConcat) {
-    std::string model = R"V0G0N(
-        <Net Name="CustomConcat_Only" version="2" precision="FP32" batch="1">
-            <layers>
-                <layer name="in1" type="Input" precision="FP32" id="0">
-                    <output>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="in2" type="Input" precision="FP32" id="1">
-                    <output>
-                        <port id="0">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="con" id="2" type="CustomInPlaceConcat" precision="FP32">
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="3">
-                            <dim>1</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="0" from-port="0" to-layer="2" to-port="1"/>
-                <edge from-layer="1" from-port="0" to-layer="2" to-port="2"/>
-            </edges>
-        </Net>
-        )V0G0N";
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-    extMgr->AddExtension(extension);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network, extMgr);
-
-    InferenceEngine::SizeVector dims_src1 = {1, 3, 5, 5};
-
-    InferenceEngine::Blob::Ptr src1 =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::NCHW});
-    src1->allocate();
-
-    float * data_src1 = src1->buffer();
-    for (size_t i = 0; i < src1->size(); i++)
-        data_src1[i] = 1;
-
-    InferenceEngine::SizeVector dims_src2 = {1, 2, 5, 5};
-
-    InferenceEngine::Blob::Ptr src2 =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::NCHW});
-    src2->allocate();
-
-    float * data_src2 = src2->buffer();
-    for (size_t i = 0; i < src2->size(); i++)
-        data_src2[i] = 2;
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-
-    auto it = out.begin();
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *it;
-
-    InferenceEngine::TensorDesc outputDesc1 = item.second->getTensorDesc();
-    InferenceEngine::TBlob<float>::Ptr output1;
-    output1 = InferenceEngine::make_shared_blob<float>(outputDesc1);
-    output1->allocate();
-    outputBlobs[item.first] = output1;
-
-    graph.Infer(srcs, outputBlobs);
-
-    float * data = outputBlobs.begin()->second->buffer();
-    for (size_t i = 0; i < 75; i++) {
-        ASSERT_EQ(*data, 1);
-        data++;
-    }
-    for (size_t i = 0; i < 50; i++) {
-        ASSERT_EQ(*data, 2);
-        data++;
-    }
-}
-
-TEST_F(MKLDNNGraphGenericTests, ExecuteGenericInPlaceSplit) {
-    std::string model = R"V0G0N(
-        <net name="ConcatOnly" version="2" precision="FP32" batch="1">
-            <layers>
-                <layer name="in1" type="Input" precision="FP32" id="1">
-                    <output>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="split" id="2" type="CustomInPlaceSplit" precision="FP32">
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                        </port>
-                        <port id="3">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="power1" id="3" type="Power" precision="FP32">
-                    <power_data power="1" scale="1" shift="3"/>
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="power2" id="4" type="Power" precision="FP32">
-                    <power_data power="1" scale="1" shift="2"/>
-                    <input>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>2</dim>
-                            <dim>4</dim>
-                            <dim>4</dim>
-                        </port>
-                    </output>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-                <edge from-layer="2" from-port="2" to-layer="3" to-port="1"/>
-                <edge from-layer="2" from-port="3" to-layer="4" to-port="1"/>
-            </edges>
-        </net>
-        )V0G0N";
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-    extMgr->AddExtension(extension);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network, extMgr);
-
-    InferenceEngine::SizeVector dims_src = {1, 4, 4, 4};
-
-    InferenceEngine::Blob::Ptr src =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-    src->allocate();
-
-    float * data_src = src->buffer();
-    for (size_t i = 0; i < src->size(); i++) {
-        if (i < src->size() / 2)
-            data_src[i] = 1;
-        else
-            data_src[i] = 2;
-    }
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-    auto it = out.begin();
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *it;
-
-    InferenceEngine::TBlob<float>::Ptr output1;
-    output1 = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output1->allocate();
-    outputBlobs[item.first] = output1;
-
-    item = *(++it);
-    InferenceEngine::TBlob<float>::Ptr output2;
-    output2 = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output2->allocate();
-    outputBlobs[item.first] = output2;
-
-    graph.Infer(srcs, outputBlobs);
-
-    float * data = output1->buffer();
-    for (size_t i = 0; i < output1->size(); i++) {
-        ASSERT_EQ(*data, 4);
-        data++;
-    }
-    data = output2->buffer();
-    for (size_t i = 0; i < output2->size(); i++) {
-        ASSERT_EQ(*data, 4);
-        data++;
-    }
-}
-
-TEST_F(MKLDNNGraphGenericTests, ExecuteGenericPrimitiveWithDynamicBatch) {
-    std::string model = R"V0G0N(
-        <Net Name="DoubleLayer_Only" version="2" precision="FP32" batch="2">
-            <layers>
-                <layer name="in1" type="Input" precision="FP32" id="0">
-                    <output>
-                        <port id="0">
-                            <dim>2</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="double_layer" id="1" type="NewDoubleLayer" precision="FP32">
-                    <input>
-                        <port id="1">
-                            <dim>2</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="2">
-                            <dim>2</dim>
-                            <dim>3</dim>
-                            <dim>5</dim>
-                            <dim>5</dim>
-                        </port>
-                    </output>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-            </edges>
-        </Net>
-        )V0G0N";
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-    extMgr->AddExtension(extension);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network, extMgr);
-
-    InferenceEngine::SizeVector dims_src = {2, 3, 5, 5};
-
-    InferenceEngine::Blob::Ptr src =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-    src->allocate();
-    fill_data(src->buffer(), src->size());
-
-    auto* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-    if (srcPtr == nullptr)
-        FAIL() << "Cannot cast blob to TBlob<float>.";
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    float *dstData = output->data();
-
-    for (size_t i = 0; i < output->size(); i++) {
-        dstData[i] = 0;
-    }
-
-    graph.Infer(srcs, outputBlobs);
-
-    InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-    dst_ref.allocate();
-
-    ref_double(*srcPtr, dst_ref);
-
-    compare(*output, dst_ref);
-
-    graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "1"}});
-
-    for (size_t i = 0; i < output->size(); i++) {
-        dstData[i] = 0;
-    }
-
-    graph.Infer(srcs, outputBlobs);
-
-    InferenceEngine::TBlob<float> dst_ref2(item.second->getTensorDesc());
-    dst_ref2.allocate();
-
-    ref_double_batch1(*srcPtr, dst_ref2);
-
-    compare(*output, dst_ref2);
-}
-
-TEST_F(MKLDNNGraphGenericTests, ExecuteNotInLineGRN) {
-    std::string model = R"V0G0N(
-<net name="default" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="norm_8x_grn" type="GRN" precision="FP32" id="1">
-            <data bias="1"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="norm_4x_grn" type="GRN" precision="FP32" id="2">
-            <data bias="1"/>
-            <input>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="merge_4x_8x_concat" type="Concat" precision="FP32" id="3">
-            <concat_data axis="1"/>
-            <input>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-                <port id="6">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="7">
-                    <dim>1</dim>
-                    <dim>6</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="3"/>
-        <edge from-layer="1" from-port="2" to-layer="3" to-port="5"/>
-        <edge from-layer="2" from-port="4" to-layer="3" to-port="6"/>
-    </edges>
-</net>)V0G0N";
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    InferenceEngine::SizeVector dims_src = {1, 3, 2, 2};
-
-    InferenceEngine::Blob::Ptr src =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-    src->allocate();
-    fill_data(src->buffer(), src->size());
-
-    auto* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-    if (srcPtr == nullptr)
-        FAIL() << "Cannot cast blob to TBlob<float>.";
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    std::vector<float> refDst = {0.000f, 0.503f, 0.659f, 0.117f, -0.474f, -0.573f, -0.202f, 0.545f, 0.619f, 0.246f,
-                                 0.000f, 0.000f, 0.000f, 0.503f, 0.659f, 0.117f, -0.474f, -0.573f, -0.202f, 0.545f,
-                                 0.619f, 0.246f, 0.000f, 0.000f};
-
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphGenericTests, ExecuteInLineGRN) {
-    std::string model = R"V0G0N(
-<net name="default" version="2" batch="1">
-    <layers>
-        <layer name="data1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="data2" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="norm_8x_grn" type="GRN" precision="FP32" id="2">
-            <data bias="1"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="norm_4x_grn" type="GRN" precision="FP32" id="3">
-            <data bias="1"/>
-            <input>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="merge_4x_8x_concat" type="Concat" precision="FP32" id="4">
-            <concat_data axis="1"/>
-            <input>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-                <port id="6">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="7">
-                    <dim>1</dim>
-                    <dim>6</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="1"/>
-        <edge from-layer="1" from-port="0" to-layer="3" to-port="3"/>
-        <edge from-layer="2" from-port="2" to-layer="4" to-port="5"/>
-        <edge from-layer="3" from-port="4" to-layer="4" to-port="6"/>
-    </edges>
-</net>)V0G0N";
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    InferenceEngine::SizeVector dims_src = {1, 3, 2, 2};
-
-    InferenceEngine::Blob::Ptr src1 =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-    src1->allocate();
-    fill_data(src1->buffer(), src1->size());
-
-    InferenceEngine::Blob::Ptr src2 =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-    src2->allocate();
-    fill_data(src2->buffer(), src2->size());
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data1", src1));
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data2", src2));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    std::vector<float> refDst = {0.000f, 0.503f, 0.659f, 0.117f, -0.474f, -0.573f, -0.202f, 0.545f, 0.619f, 0.246f,
-                                 0.000f, 0.000f, 0.000f, 0.503f, 0.659f, 0.117f, -0.474f, -0.573f, -0.202f, 0.545f,
-                                 0.619f, 0.246f, 0.000f, 0.000f};
-
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
-
-    compare(*output, *dstOut);
-}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp
deleted file mode 100644
index bd9978dcc72..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp
+++ /dev/null
@@ -1,273 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct log_softmax_test_params {
-    InferenceEngine::SizeVector in_out;
-    std::vector<float>          src;
-    int                         axis;
-    std::vector<float>          reference;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-void ref_log_softmax(InferenceEngine::TBlob<float> &src, int axis, InferenceEngine::TBlob<float> &dst) {
-    float *src_data = src.data();
-    float *dst_data = dst.data();
-    InferenceEngine::SizeVector dims = src.getTensorDesc().getDims();
-
-    if (axis < 0) axis += dims.size();
-
-    size_t W = dims[3];
-    size_t H = dims[2];
-    size_t C = dims[1];
-    size_t MB = dims[0];
-
-    auto off = [=](int n, int c, int h, int w)
-    {
-        return (n * W * H * C + c * W * H + h * W + w);
-    };
-
-    if(axis == 0) {
-        for (int c = 0; c < C; ++c) {
-            for (int h = 0; h < H; ++h) {
-                for (int w = 0; w < W; ++w) {
-                    float result = 0.0f;
-                    for (int n = 0; n < MB; ++n) {
-                        result += expf(src_data[off(n, c, h, w)]);
-                    }
-                    result = logf(result);
-                    for (int n = 0; n < MB; ++n) {
-                        dst_data[off(n, c, h, w)] = src_data[off(n, c, h, w)] - result;
-                    }
-                }
-            }
-        }
-    } else if(axis == 1) {
-        for (int n = 0; n < MB; ++n) {
-            for (int h = 0; h < H; ++h) {
-                for (int w = 0; w < W; ++w) {
-                    float result = 0.0f;
-                    for (int c = 0; c < C; ++c) {
-                        result += expf(src_data[off(n, c, h, w)]);
-                    }
-                    result = logf(result);
-                    for (int c = 0; c < C; ++c) {
-                        dst_data[off(n, c, h, w)] = src_data[off(n, c, h, w)] - result;
-                    }
-                }
-            }
-        }
-    } else if(axis == 2) {
-        for (int n = 0; n < MB; ++n) {
-            for (int c = 0; c < C; ++c) {
-                for (int w = 0; w < W; ++w) {
-                    float result = 0.0f;
-                    for (int h = 0; h < H; ++h) {
-                        result += expf(src_data[off(n, c, h, w)]);
-                    }
-                    result = logf(result);
-                    for (int h = 0; h < H; ++h) {
-                        dst_data[off(n, c, h, w)] = src_data[off(n, c, h, w)] - result;
-                    }
-                }
-            }
-        }
-    } else if(axis == 3) {
-        for (int n = 0; n < MB; ++n) {
-            for (int c = 0; c < C; ++c) {
-                for (int h = 0; h < H; ++h) {
-                    float result = 0.0f;
-                    for (int w = 0; w < W; ++w) {
-                        result += expf(src_data[off(n, c, h, w)]);
-                    }
-                    result = logf(result);
-                    for (int w = 0; w < W; ++w) {
-                        dst_data[off(n, c, h, w)] = src_data[off(n, c, h, w)] - result;
-                    }
-                }
-            }
-        }
-    }
-}
-
-void ref_log_softmax_any_dims(InferenceEngine::TBlob<float> &src, int axis, InferenceEngine::TBlob<float> &dst) {
-    size_t i, j, k, axis_step = 1, reduced_axis_size, reduced_axis_stride = 1;
-    InferenceEngine::SizeVector dims = src.getTensorDesc().getDims();
-    float *src_data = src.data();
-    float *dst_data = dst.data();
-
-    if (axis < 0) axis += dims.size();
-    for (i = 0; i < axis; i++) axis_step *= dims[i];
-    reduced_axis_size = dims[axis];
-    for (i = (axis + 1); i < dims.size(); i++) reduced_axis_stride *= dims[i];
-
-    for (k = 0; k < axis_step; k++) {
-        for (i = 0; i < reduced_axis_stride; i++) {
-            float reduce_prod = 0.0f;
-            const float *src_dataPtr = &src_data[k * reduced_axis_stride * reduced_axis_size + i];
-            for (j = 0; j < reduced_axis_size; ++j) {
-                reduce_prod += expf((*src_dataPtr));
-                src_dataPtr += reduced_axis_stride;
-            }
-
-            reduce_prod = logf(reduce_prod);
-            src_dataPtr = &src_data[k * reduced_axis_stride * reduced_axis_size + i];
-            float *dst_dataPtr = (float*)&dst_data[k * reduced_axis_stride * reduced_axis_size + i];
-            for (j = 0; j < reduced_axis_size; ++j) {
-                (*dst_dataPtr) = (*src_dataPtr) - reduce_prod;
-                src_dataPtr += reduced_axis_stride;
-                dst_dataPtr += reduced_axis_stride;
-            }
-        }
-    }
-}
-
-class MKLDNNCPUExtLogSoftmaxTests : public TestsCommon, public WithParamInterface<log_softmax_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Math_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="Input" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-        <layer name="math" id="2" type="LogSoftmax" precision="FP32">
-            <data axis="_AXIS_"/>
-            <input>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(log_softmax_test_params p) {
-        std::string model = model_t;
-        std::string in_out;
-
-        for (auto& dst : p.in_out) {
-            in_out += "<dim>";
-            in_out += std::to_string(dst) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_IN_OUT_", in_out);
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            log_softmax_test_params p = ::testing::WithParamInterface<log_softmax_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Input Data
-            InferenceEngine::Blob::Ptr srcData = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_out, InferenceEngine::TensorDesc::getLayoutByDims(p.in_out) });
-            srcData->allocate();
-            if (p.src.size())
-                memcpy(srcData->buffer(), &p.src[0], sizeof(float)*p.src.size());
-            else
-                fill_data(srcData->buffer(), srcData->size());
-            auto * srcDataPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcData.get());
-            if (srcDataPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            // Check results
-            if (p.in_out.size() == 4) {
-                ref_log_softmax(*srcDataPtr, p.axis, dst_ref);
-                if (p.reference.size()) {
-                    for (size_t i = 0; i < p.reference.size(); i++) {
-                        ASSERT_NEAR(dst_ref.data()[i], p.reference[i], 0.00001f);
-                    }
-                }
-            }
-            ref_log_softmax_any_dims(*srcDataPtr, p.axis, dst_ref);
-            if (p.reference.size()) {
-                for (size_t i = 0; i < p.reference.size(); i++) {
-                    ASSERT_NEAR(dst_ref.data()[i], p.reference[i], 0.00001f);
-                }
-            }
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("Input", srcData));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref, 0.00001f);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtLogSoftmaxTests, TestsLogSoftmax) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsLogSoftmax, MKLDNNCPUExtLogSoftmaxTests,
-        ::testing::Values(
-            // Params: in_out, src, axis, reference
-            log_softmax_test_params{ { 1, 1, 1, 3 },{ -0.5f, 0.f, 0.5f },3,{ -1.68026966f, -1.1802697f, -0.68026966 } },
-            log_softmax_test_params{ { 1, 1, 1, 3 },{ -0.5f, 0.f, 0.5f },-1,{ -1.68026966f, -1.1802697f, -0.68026966 } },
-            log_softmax_test_params{ { 3, 1, 1, 1 },{ -0.5f, 0.f, 0.5f },0,{ -1.68026966f, -1.1802697f, -0.68026966 } },
-            log_softmax_test_params{ { 1, 1, 2, 2 },{ 1.0f, 0.5f, 0.f, -0.5f },3,{ -0.474077f, -0.974077f, -0.474077f, -0.974077f } },
-            log_softmax_test_params{ { 2, 2, 1, 1 },{ 1.0f, 0.5f, 0.f, -0.5f },1,{ -0.474077f, -0.974077f, -0.474077f, -0.974077f } },
-            log_softmax_test_params{ { 2, 2, 1, 1 },{ 1.0f, 0.5f, 0.f, -0.5f },-3,{ -0.474077f, -0.974077f, -0.474077f, -0.974077f } },
-            log_softmax_test_params{ { 2, 3, 3, 2 },{ },3,{ } },
-            log_softmax_test_params{ { 1, 1, 2, 2 },{ 1.0f, 0.5f, 0.f, -0.5f },2,{ -0.31326166f, -0.31326166f, -1.3132616f, -1.3132616f } },
-            log_softmax_test_params{ { 2, 3, 3, 2 },{},0,{} },
-            log_softmax_test_params{ { 2, 3, 3, 2 },{},1,{} },
-            log_softmax_test_params{ { 2, 3, 3, 2 },{},2,{} },
-            log_softmax_test_params{ { 2, 3, 3, 2, 4, 5, 1, 2 },{},4,{} }
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp
deleted file mode 100644
index 573ed839dc9..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp
+++ /dev/null
@@ -1,319 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-#include "common_test_utils/data_utils.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct math_test_params {
-    std::string                 math_function;
-    InferenceEngine::SizeVector in_out;
-    std::vector<float>          input_tensor;
-    std::vector<float>          alpha;
-    std::vector<float>          beta;
-    std::vector<float>          gamma;
-    std::vector<float>          reference;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-void ref_math(
-    std::string                    math_function,
-    InferenceEngine::TBlob<float> &src,
-    std::vector<float>             alpha,
-    std::vector<float>             beta,
-    std::vector<float>             gamma,
-    InferenceEngine::TBlob<float> &dst
-) {
-    size_t i;
-    float* src_data = src.data();
-    float *dst_data = dst.data();
-    size_t dst_size = dst.size();
-
-    if (math_function == "Erf") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = std::erf(src_data[i]);
-        }
-    } else if (math_function == "Abs") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = (std::abs)(src_data[i]);
-        }
-    } else if (math_function == "Acos") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = acosf(src_data[i]);
-        }
-    } else if (math_function == "Acosh") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = acoshf(src_data[i]);
-        }
-    } else if (math_function == "Asin") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = asinf(src_data[i]);
-        }
-    } else if (math_function == "Asinh") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = asinhf(src_data[i]);
-        }
-    } else if (math_function == "Atan") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = atanf(src_data[i]);
-        }
-    } else if (math_function == "Atanh") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = atanhf(src_data[i]);
-        }
-    } else if (math_function == "Ceil") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = ceilf(src_data[i]);
-        }
-    } else if (math_function == "Cos") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = cosf(src_data[i]);
-        }
-    } else if (math_function == "Cosh") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = coshf(src_data[i]);
-        }
-    } else if (math_function == "Floor") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = floorf(src_data[i]);
-        }
-    } else if (math_function == "HardSigmoid") {
-        alpha[0] = (alpha[0] == 0.0f) ? 0.2f : alpha[0];
-        beta[0] = (beta[0] == 0.0f) ? 0.5f : beta[0];
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = (std::max)(0.f, (std::min)(1.f, alpha[0] * src_data[i] + beta[0]));
-        }
-    } else if (math_function == "Log") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = logf(src_data[i]);
-        }
-    } else if (math_function == "Neg") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = -src_data[i];
-        }
-    } else if (math_function == "Reciprocal") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = 1.0f / src_data[i];
-        }
-    } else if (math_function == "Selu") {
-        alpha[0] = (alpha[0] == 0.0f) ? 1.67326f : alpha[0];
-        gamma[0] = (gamma[0] == 0.0f) ? 1.0507f : gamma[0];
-        for (i = 0; i < dst_size; i++) {
-            float x = src_data[i];
-            dst_data[i] = (x > 0.0f) ? (gamma[0] * x) : (gamma[0] * alpha[0] * (exp(x) - 1.0f));
-        }
-    } else if (math_function == "Sign") {
-        for (i = 0; i < dst_size; i++) {
-            if (src_data[i] > 0.0f) dst_data[i] = 1.0f;
-            else if (src_data[i] < 0.0f) dst_data[i] = -1.0f;
-            else dst_data[i] = 0.0f;
-        }
-    } else if (math_function == "Sin") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = sinf(src_data[i]);
-        }
-    } else if (math_function == "Sinh") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = sinhf(src_data[i]);
-        }
-    } else if (math_function == "SoftPlus") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = logf(expf(src_data[i]) + 1);
-        }
-    } else if (math_function == "Softsign") {
-        for (i = 0; i < dst_size; i++) {
-            float x = src_data[i];
-            dst_data[i] = x / (1.f + (std::abs)(x));
-        }
-    } else if (math_function == "Tan") {
-        for (i = 0; i < dst_size; i++) {
-            dst_data[i] = tanf(src_data[i]);
-        }
-    }
-}
-
-class MKLDNNCPUExtMathTests: public TestsCommon, public WithParamInterface<math_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Math_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="Input" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-        <layer name="math" id="2" type="_MATH_FUNCTION_" precision="FP32">
-            <data _ALPHA_ _BETA_ _GAMMA_/>
-            <input>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(math_test_params p) {
-        std::string model = model_t;
-        std::string in_out = "";
-        std::string alpha;
-        std::string beta;
-        std::string gamma;
-
-        for (auto& dst : p.in_out) {
-            in_out += "<dim>";
-            in_out += std::to_string(dst) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_IN_OUT_", in_out);
-        REPLACE_WITH_STR(model, "_MATH_FUNCTION_", p.math_function);
-
-        if (p.alpha.size()) {
-            alpha = "alpha=\"" + to_string_c_locale(p.alpha[0]) + "\"";
-        }
-        REPLACE_WITH_STR(model, "_ALPHA_", alpha);
-
-        if (p.beta.size()) {
-            beta = "beta=\"" + to_string_c_locale(p.beta[0]) + "\"";
-        }
-        REPLACE_WITH_STR(model, "_BETA_", beta);
-
-        if (p.gamma.size()) {
-            gamma = "gamma=\"" + to_string_c_locale(p.gamma[0]) + "\"";
-        }
-        REPLACE_WITH_STR(model, "_GAMMA_", gamma);
-        return model;
-    }
-
-    template <typename data_t>
-    static void fill_data_dbgval(data_t *data, size_t size) {
-        for (size_t i = 0; i < size; i++) {
-            data[i] = static_cast<data_t>(i & (sizeof(data_t) * 8 - 1));
-        }
-    }
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            math_test_params p = ::testing::WithParamInterface<math_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Input Data
-            InferenceEngine::Blob::Ptr srcData = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_out, InferenceEngine::TensorDesc::getLayoutByDims(p.in_out) });
-            srcData->allocate();
-            if (p.input_tensor.size())
-                memcpy(srcData->buffer(), &p.input_tensor[0], sizeof(float)*p.input_tensor.size());
-            else {
-                if (p.math_function == "Erf")
-                    CommonTestUtils::fill_data_sine(srcData->buffer(), srcData->size(), 0.f, 3.f, 1.f);
-                else
-                    CommonTestUtils::fill_data(srcData->buffer(), srcData->size());
-            }
-            auto * srcDataPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcData.get());
-            if (srcDataPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            // Check results
-            ref_math(p.math_function, *srcDataPtr, p.alpha, p.beta, p.gamma, dst_ref);
-            if (p.reference.size()) {
-                for (size_t i = 0; i < p.reference.size(); i++) {
-                    ASSERT_NEAR(dst_ref.data()[i], p.reference[i], 0.00001f);
-                }
-            }
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("Input", srcData));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            float threshold = p.math_function == "Erf" ? 0.0001f : 0.00001f;
-            compare(*output, dst_ref, threshold);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtMathTests, TestsMath) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsMath, MKLDNNCPUExtMathTests,
-            ::testing::Values(
-                // Params: math_function, in_out, input_tensor, alpha, beta, gamma, reference
-                math_test_params{ "Erf", {},{},{},{},{},{} },
-                math_test_params{ "Erf", { 1, 1, 12, 256 }, {},{},{},{}, {} },
-                math_test_params{ "Erf", { 12, 256, 3 },{},{},{},{},{} },
-                math_test_params{ "Erf", { 3, 4 },{},{},{},{},{} },
-                math_test_params{ "Erf", { 20 },{},{},{},{},{} },
-                math_test_params{ "Erf", { 12, 4, 9, 8 },{},{},{},{},{} },
-                math_test_params{ "Erf", { 6, 12, 4, 9, 8, 10, 3 },{},{},{},{},{} },
-                math_test_params{ "Abs",{ 3 },{ -1, 0, 1 },{},{},{},{ 1, 0, 1 } },
-                math_test_params{ "Acos",{ 3 },{ -0.5f, 0.f, 0.5f },{},{},{},{ 2.09439516f, 1.57079637f, 1.04719758f } },
-                math_test_params{ "Acosh",{ 3 },{ 1.f, 2.0f, 3.0f },{},{},{},{} },
-                math_test_params{ "Asin",{ 3 },{ -0.5f, 0.f, 0.5f },{},{},{},{ -0.523598790f, 0.0f, 0.523598790f } },
-                math_test_params{ "Asinh",{ 3 },{ -0.5f, 0.f, 0.5f },{},{},{},{ } },
-                math_test_params{ "Atan",{ 3 },{ -1, 0, 1 },{},{},{},{ -0.785398185f, 0.0f, 0.785398185f } },
-                math_test_params{ "Atanh",{ 3 },{ -0.5f, 0.f, 0.5f },{},{},{},{ } },
-                math_test_params{ "Ceil",{ 2 },{ -1.5f, 1.2f },{},{},{},{ -1, 2 } },
-                math_test_params{ "Cos",{ 3 },{ -1, 0, 1 },{},{},{},{ 0.540302336f, 1.0f, 0.540302336f } },
-                math_test_params{ "Cosh",{ 3 },{ -0.5f, 0.f, 0.5f },{},{},{},{ } },
-                math_test_params{ "Floor",{ 3 },{-1.5f, 1.2f, 2.f},{},{},{},{-2, 1, 2} },
-                math_test_params{ "HardSigmoid",{ 3 },{ -1, 0, 1 },{0.5f},{0.6f},{},{ 0.1f, 0.6f, 1.f } },
-                math_test_params{ "Log",{ 2 },{ 1, 10 },{},{},{},{ 0.f, 2.30258512f } },
-                math_test_params{ "Neg",{ 3 },{ -1, 0, 1 },{},{},{},{ 1, 0, -1 } },
-                math_test_params{ "Reciprocal",{ 3 },{ -1, 0.1, 1 },{2},{},{3},{-1, 10, 1} },
-                math_test_params{ "Selu",{ 3 },{ -1, 0, 1 },{2},{},{3},{ -3.79272318f, 0.f, 3.f } },
-                math_test_params{ "Sign",{ 3 },{ -0.5f, 0.f, 0.5f },{},{},{},{-1, 0, 1} },
-                math_test_params{ "Sin",{ 3 },{ -1, 0, 1 },{},{},{},{ -0.841470957f, 0.0f, 0.841470957f } },
-                math_test_params{ "Sinh",{ 3 },{ -0.5f, 0.f, 0.5f },{},{},{},{ } },
-                math_test_params{ "SoftPlus",{ 3 },{ -1, 0, 1 },{},{},{},{ 0.31326166f, 0.69314718f, 1.31326163f } },
-                math_test_params{ "Softsign",{ 3 },{ -1, 0, 1 },{},{},{},{ -0.5f, 0.f, 0.5f } },
-                math_test_params{ "Tan",{ 3 },{ -1, 0, 1 },{},{},{},{ -1.55740774f, 0.0f, 1.55740774f } }
-            ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp
deleted file mode 100644
index 7e1179fbf5e..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp
+++ /dev/null
@@ -1,646 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include "ir_gen_helper.hpp"
-#include <ie_core.hpp>
-
-#include <nodes/base.hpp>
-#include <ie_system_conf.h>
-
-using namespace InferenceEngine;
-using namespace ::testing;
-using namespace std;
-using namespace single_layer_tests;
-
-using namespace Extensions;
-using namespace ::Cpu;
-
-namespace {
-
-OV_CC_DOMAINS(MVNTests);
-
-}   // namespace
-
-struct mvn_test_params {
-    vector<size_t> dims;
-
-    int across_channels;
-    int normalize_variance;
-    float eps;
-
-    size_t num_prim_desc;
-    bool isBlockedFormat;
-    int selectedType;
-
-    Precision prec_in;
-    Precision prec_out;
-
-    vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-extern InferenceEngine::IExtensionPtr make_FakeExtensions();
-
-template <typename data_t>
-void ref_mvn(const TBlob<data_t> &src, TBlob<data_t> &dst, mvn_test_params prm) {
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-    size_t dims_size = prm.dims.size();
-
-    size_t N = prm.dims[0];
-    size_t C = prm.dims[1];
-    size_t D = dims_size > 4 ? prm.dims[dims_size - 3lu] : 1lu;
-    size_t H = dims_size > 3 ? prm.dims[dims_size - 2lu] : 1lu;
-    size_t W = prm.dims[dims_size - 1lu];
-
-    float eps = prm.eps;
-
-    size_t C1 = H * W;
-    size_t C2 = C1 * D;
-    size_t C3 = C2 * C;
-
-    float C2inv = 1.f / static_cast<float>(C2);
-    float C3inv = 1.f / static_cast<float>(C3);
-
-    for (size_t b = 0lu; b < N; b++) {
-        size_t cb = b * C3;
-        // Calculate mean value
-        if (prm.across_channels) {
-            float mean = 0.0f;
-            for (size_t c = 0lu; c < C; c++) {
-                size_t cc = cb + c * C2;
-                for (size_t d = 0lu; d < D; d++) {
-                    size_t cd = cc + d * C1;
-                    for (size_t h = 0lu; h < H; h++) {
-                        size_t ch = cd + h * W;
-                        for (size_t w = 0lu; w < W; w++) {
-                            mean += src_data[ch + w];
-                        }
-                    }
-                }
-            }
-            mean *= C3inv;
-            for (size_t c = 0lu; c < C; c++) {
-                size_t cc = cb + c * C2;
-                for (size_t d = 0lu; d < D; d++) {
-                    size_t cd = cc + d * C1;
-                    for (size_t h = 0lu; h < H; h++) {
-                        size_t ch = cd + h * W;
-                        for (size_t w = 0lu; w < W; w++) {
-                            size_t index = ch + w;
-                            dst_data[index] = src_data[index] - mean;
-                        }
-                    }
-                }
-            }
-        } else {
-            for (size_t c = 0lu; c < C; c++) {
-                size_t cc = cb + c * C2;
-                float mean = 0.0f;
-                for (size_t d = 0lu; d < D; d++) {
-                    size_t cd = cc + d * C1;
-                    for (size_t h = 0lu; h < H; h++) {
-                        size_t ch = cd + h * W;
-                        for (size_t w = 0lu; w < W; w++) {
-                            mean += src_data[ch + w];
-                        }
-                    }
-                }
-
-                mean *= C2inv;
-
-                for (size_t d = 0lu; d < D; d++) {
-                    size_t cd = cc + d * C1;
-                    for (size_t h = 0lu; h < H; h++) {
-                        size_t ch = cd + h * W;
-                        for (size_t w = 0lu; w < W; w++) {
-                            size_t index = ch + w;
-                            dst_data[index] = src_data[index] - mean;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    if (prm.normalize_variance) {
-        for (size_t b = 0; b < N; b++) {
-            size_t cb = b * C3;
-            // Calculate variances value
-            if (prm.across_channels) {
-                float variance = 0.f;
-                for (size_t c = 0lu; c < C; c++) {
-                    size_t cc = cb + c * C2;
-                    for (size_t d = 0lu; d < D; d++) {
-                        size_t cd = cc + d * C1;
-                        for (size_t h = 0lu; h < H; h++) {
-                            size_t ch = cd + h * W;
-                            for (size_t w = 0lu; w < W; w++) {
-                                variance += dst_data[ch + w] * dst_data[ch + w];
-                            }
-                        }
-                    }
-                }
-                variance = 1.f / sqrtf(variance * C3inv + eps);
-                for (size_t c = 0lu; c < C; c++) {
-                    size_t cc = cb + c * C2;
-                    for (size_t d = 0lu; d < D; d++) {
-                        size_t cd = cc + d * C1;
-                        for (size_t h = 0lu; h < H; h++) {
-                            size_t ch = cd + h * W;
-                            for (size_t w = 0lu; w < W; w++) {
-                                dst_data[ch + w] *= variance;
-                            }
-                        }
-                    }
-                }
-            } else {
-                for (size_t c = 0lu; c < C; c++) {
-                    size_t cc = cb + c * C2;
-                    float variance = 0.0f;
-                    for (size_t d = 0lu; d < D; d++) {
-                        size_t cd = cc + d * C1;
-                        for (size_t h = 0lu; h < H; h++) {
-                            size_t ch = cd + h * W;
-                            for (size_t w = 0lu; w < W; w++) {
-                                variance += dst_data[ch + w] * dst_data[ch + w];
-                            }
-                        }
-                    }
-                    variance = 1.f / sqrtf(variance * C2inv + eps);
-                    for (size_t d = 0lu; d < D; d++) {
-                        size_t cd = cc + d * C1;
-                        for (size_t h = 0lu; h < H; h++) {
-                            size_t ch = cd + h * W;
-                            for (size_t w = 0lu; w < W; w++) {
-                                dst_data[ch + w] *= variance;
-                                if (prm.prec_out == Precision::U8) {
-                                    dst_data[ch + w] = (dst_data[ch + w] > 0) ? roundf(dst_data[ch + w]) : 0;
-                                } else if (prm.prec_out == Precision::I8) {
-                                    dst_data[ch + w] = roundf(dst_data[ch + w]);
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNCPUExtMVNTests: public TestsCommon, public WithParamInterface<mvn_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="fakeLayer" id="1" type="_FL_" precision="FP32">
-            <input>
-                <port id="1">
-                    __SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    __SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer name="mvn" id="2" type="MVN" precision="FP32">
-            <data across_channels="_AC_" normalize_variance="_NV_" eps="_EPS_"/>
-            <input>
-                <port id="3">
-                    __SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    __SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-)V0G0N";
-
-    std::string getModel(mvn_test_params p) {
-        std::string model = layers_t;
-        if (p.isBlockedFormat)
-            REPLACE_WITH_STR(model, "_FL_", "FakeLayerBLK");
-        else
-            REPLACE_WITH_STR(model, "_FL_", "FakeLayerPLN");
-
-        std::string s_dims;
-        for (auto& dim : p.dims) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	    REPLACE_WITH_STR(model, "__SRC_DIMS__", s_dims);
-
-        REPLACE_WITH_NUM(model, "_AC_", p.across_channels);
-        REPLACE_WITH_NUM(model, "_NV_", p.normalize_variance);
-        REPLACE_WITH_NUM(model, "_EPS_", p.eps);
-
-        model = IRTemplateGenerator::getIRTemplate("MVN_Only", p.dims, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            mvn_test_params p = ::testing::WithParamInterface<mvn_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-            auto defaultExtensions = std::make_shared<InferenceEngine::Extensions::Cpu::MKLDNNExtensions>();
-            extMgr->AddExtension(defaultExtensions);
-            extMgr->AddExtension(make_FakeExtensions());
-
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network, extMgr);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-
-            for (auto &node : nodes) {
-                if (node->getName() == "mvn") {
-                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                              node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            SizeVector dims_src = p.dims;
-
-            Layout layout = ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = NCHW;
-                    break;
-                case 5:
-                    layout = NCDHW;
-                    break;
-            }
-
-            Blob::Ptr src = make_shared_blob<float>({ Precision::FP32, dims_src, layout });
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            BlobMap srcs;
-            srcs.insert(std::pair<std::string, Blob::Ptr>("in1", src));
-
-            OutputsDataMap out;
-            out = network.getOutputsInfo();
-            BlobMap outputBlobs;
-
-            std::pair<std::string, DataPtr> item = *out.begin();
-
-            TBlob<float>::Ptr output;
-            output = make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-            ref_mvn(*srcPtr, dst_ref, p);
-            compare(*output, dst_ref, 0.0001f);
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtMVNTests, TestsMVN) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsMVN, MKLDNNCPUExtMVNTests,
-        ::testing::Values(
-        /*0*/   mvn_test_params{{2, 64, 15, 15}, 0, 0, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2,  2, 33, 65}, 0, 0, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 15, 15}, 1, 0, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2,  2, 33, 65}, 1, 0, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 15, 15}, 1, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2,  2, 33, 65}, 1, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 15, 15}, 0, 0, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-        /*9*/   mvn_test_params{{2,  2, 33, 65}, 0, 0, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 15, 15}, 1, 0, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2,  2, 33, 65}, 1, 0, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-        /*14*/  mvn_test_params{{2,640, 15, 15}, 1, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2,  2, 33, 65}, 1, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-
-                // 5D
-        /*16*/  mvn_test_params{{2, 64, 24, 32, 40}, 0, 0, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 24, 32, 40}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 24, 32, 40}, 1, 0, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 24, 32, 40}, 1, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 24, 32, 40}, 0, 0, 0.00001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 24, 32, 40}, 0, 1, 0.00001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{2, 64, 24, 32, 40}, 1, 0, 0.00001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-        /*23*/  mvn_test_params{{2, 64, 24, 32, 40}, 1, 1, 0.00001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown },
-                mvn_test_params{{1, 64, 32, 32, 32}, 0, 1, 0.001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown }
-            ));
-
-static std::string precToStr (Precision prec) {
-    return prec == Precision::U8 ? "U8" : prec == Precision::I8 ? "I8" : "FP32";
-}
-
-template <typename data_t>
-static void fill_int_data(data_t *data, int size, bool is_signed) {
-    for (int i = 0 ; i < size; i++) {
-        data[i] = i * 13 % 21 - 10 * is_signed;
-    }
-}
-
-class FakeLayerImpl_MVN: public Cpu::ExtLayerBase,
-                     public WithParamInterface<mvn_test_params> {
-public:
-    explicit FakeLayerImpl_MVN(const CNNLayer* layer) {
-        try {
-            is_blocked = layer->GetParamAsBool("is_blocked");
-            addConfig(layer);
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    bool is_blocked;
-
-    void addConfig(const CNNLayer* layer) {
-        LayerConfig config;
-
-        // Fill tensor parameters into config
-        auto fill_port = [&] (std::vector<DataConfig>& port, const DataPtr& data) {
-            auto div_up = [](const int a, const int b) -> int {
-                if (!b)
-                    return 0;
-                return (a + b - 1) / b;
-            };
-            if (!data) IE_THROW() << "Cannot get input data!";
-
-            DataConfig dataConfig;
-            dataConfig.inPlace = 0;
-            dataConfig.constant = false;
-
-            const TensorDesc& data_desc = data->getTensorDesc();
-            const SizeVector& data_dims = data_desc.getDims();
-
-            InferenceEngine::Precision precision = data_desc.getPrecision();
-            Layout layout;
-            if (is_blocked) {
-                int blk_size = InferenceEngine::with_cpu_x86_avx512f() ? 16 : 8;
-
-                std::vector<size_t> blocks = data_dims;
-                std::vector<size_t> order(blocks.size());
-                for (size_t i = 0; i < order.size(); i++) order[i] = i;
-
-                order.push_back(1);
-                blocks[1] = div_up(blocks[1], blk_size);
-                blocks.push_back(blk_size);
-
-                dataConfig.desc = TensorDesc(precision, data_dims, {blocks, order});
-            } else {
-                dataConfig.desc = TensorDesc(precision, data_dims, data_dims.size() == 5 ? NDHWC : NHWC);
-            }
-
-            port.push_back(dataConfig);
-        };
-
-        fill_port(config.inConfs, layer->insData[0].lock());
-        fill_port(config.outConfs, layer->outData[0]);
-        config.inConfs[0].desc.setPrecision(config.outConfs[0].desc.getPrecision());
-        confs.push_back(config);
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        return OK;
-    }
-};
-
-class MKLDNNCPUExtMVNTests_Blocked: public TestsCommon, public WithParamInterface<mvn_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="fakeLayer1" id="1" type="FakeLayer_MVN">
-            <data is_blocked="_IS_BLOCKED_"/>
-            <input>
-                <port id="1">
-                    __SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="2" precision="_PREC_IN_">
-                    __SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer name="mvn" id="2" type="MVN">
-            <data across_channels="_AC_" normalize_variance="_NV_" eps="_EPS_"/>
-            <input>
-                <port id="3">
-                    __SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="4" precision="_PREC_OUT_">
-                    __SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer name="fakeLayer2" id="3" type="FakeLayer_MVN">
-            <data is_blocked="_IS_BLOCKED_"/>
-            <input>
-                <port id="5">
-                    __SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="6" precision="_PREC_OUT_">
-                    __SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-        <edge from-layer="2" from-port="4" to-layer="3" to-port="5"/>
-)V0G0N";
-
-    std::string getModel(mvn_test_params p) {
-        std::string model = layers_t;
-
-        std::string s_dims;
-        for (auto& dim : p.dims) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-        REPLACE_WITH_STR(model, "__SRC_DIMS__", s_dims);
-
-        REPLACE_WITH_NUM(model, "_AC_", p.across_channels);
-        REPLACE_WITH_NUM(model, "_NV_", p.normalize_variance);
-        REPLACE_WITH_NUM(model, "_EPS_", p.eps);
-        REPLACE_WITH_STR(model, "_PREC_IN_", precToStr(p.prec_in));
-        REPLACE_WITH_STR(model, "_PREC_OUT_", precToStr(p.prec_out));
-        REPLACE_WITH_NUM(model, "_IS_BLOCKED_", p.isBlockedFormat);
-
-        model = IRTemplateGenerator::getIRTemplate("MVN_Only", p.dims, "FP32", model, edges_t, 7);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            mvn_test_params p = ::testing::WithParamInterface<mvn_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
-            {
-                auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
-                defaultExt->layersFactory.registerNodeIfRequired(MVNTests, FakeLayer_MVN, "FakeLayer_MVN", Cpu::ImplFactory<FakeLayerImpl_MVN>);
-                manager->AddExtension(defaultExt);
-            }
-            graph.CreateGraph(network, manager);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-
-            for (auto &node : nodes) {
-                if (node->getName() == "mvn") {
-                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                              node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            SizeVector dims_src = p.dims;
-
-            Layout layout = ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = NCHW;
-                    break;
-                case 5:
-                    layout = NCDHW;
-                    break;
-            }
-
-            Blob::Ptr src = make_shared_blob<float>({ Precision::FP32, dims_src, layout });
-            src->allocate();
-            if (p.prec_in == Precision::U8) {
-                fill_int_data(src->buffer().as<float *>(), src->size(), false);
-            } else if (p.prec_in == Precision::I8) {
-                fill_int_data(src->buffer().as<float *>(), src->size(), true);
-            } else {
-                fill_data(src->buffer(), src->size());
-            }
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            BlobMap srcs;
-            srcs.insert(std::pair<std::string, Blob::Ptr>("in1", src));
-
-            OutputsDataMap out;
-            out = network.getOutputsInfo();
-            BlobMap outputBlobs;
-
-            std::pair<std::string, DataPtr> item = *out.begin();
-
-            TBlob<float>::Ptr output;
-            output = make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-            ref_mvn(*srcPtr, dst_ref, p);
-            compare(*output, dst_ref, 0.0001f);
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtMVNTests_Blocked, TestsMVN) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsMVN, MKLDNNCPUExtMVNTests_Blocked,
-        ::testing::Values(
-                        mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                        mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                        mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-
-                        mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 },
-                /*4*/   // mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 },
-                        mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 },
-
-                        mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 },
-                /*7*/   // mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 },
-                        mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 },
-
-                        mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 },
-                        mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 },
-                        mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 },
-
-                        mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 },
-                /*13*/  // mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 },
-                        mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 },
-
-                        mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 },
-                /*16*/  // mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 },
-                        mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 },
-
-                        mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 },
-                        mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 },
-                        mvn_test_params{{2, 64, 8, 8, 8}, 0, 1, 0.00001f, 3, false, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 },
-
-                        mvn_test_params{{2, 64, 15, 15}, 0, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                        mvn_test_params{{2,  2, 33, 65}, 0, 1, 0.00001, 3, true, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                        mvn_test_params{{2, 64, 24, 32, 40}, 0, 1, 0.00001f, 3, true, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 }
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp
deleted file mode 100644
index 1b7972eff63..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp
+++ /dev/null
@@ -1,568 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct nmsTF_test_params {
-    int center_point_box;
-    int sort_result_descending;
-    InferenceEngine::SizeVector scoresDim;
-    std::vector<float> boxes;
-    std::vector<float> scores;
-    std::vector<int> max_output_boxes_per_class;
-    std::vector<float> iou_threshold;
-    std::vector<float> score_threshold;
-
-    int num_selected_indices;
-    std::vector<int> ref;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-static float intersectionOverUnion(float* boxesI, float* boxesJ, bool center_point_box) {
-    float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ;
-    if (center_point_box) {
-        //  box format: x_center, y_center, width, height
-        yminI = boxesI[1] - boxesI[3] / 2.f;
-        xminI = boxesI[0] - boxesI[2] / 2.f;
-        ymaxI = boxesI[1] + boxesI[3] / 2.f;
-        xmaxI = boxesI[0] + boxesI[2] / 2.f;
-        yminJ = boxesJ[1] - boxesJ[3] / 2.f;
-        xminJ = boxesJ[0] - boxesJ[2] / 2.f;
-        ymaxJ = boxesJ[1] + boxesJ[3] / 2.f;
-        xmaxJ = boxesJ[0] + boxesJ[2] / 2.f;
-    } else {
-        //  box format: y1, x1, y2, x2
-        yminI = (std::min)(boxesI[0], boxesI[2]);
-        xminI = (std::min)(boxesI[1], boxesI[3]);
-        ymaxI = (std::max)(boxesI[0], boxesI[2]);
-        xmaxI = (std::max)(boxesI[1], boxesI[3]);
-        yminJ = (std::min)(boxesJ[0], boxesJ[2]);
-        xminJ = (std::min)(boxesJ[1], boxesJ[3]);
-        ymaxJ = (std::max)(boxesJ[0], boxesJ[2]);
-        xmaxJ = (std::max)(boxesJ[1], boxesJ[3]);
-    }
-
-    float areaI = (ymaxI - yminI) * (xmaxI - xminI);
-    float areaJ = (ymaxJ - yminJ) * (xmaxJ - xminJ);
-    if (areaI <= 0.f || areaJ <= 0.f)
-        return 0.f;
-
-    float intersection_area =
-            (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ), 0.f) *
-            (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ), 0.f);
-    return intersection_area / (areaI + areaJ - intersection_area);
-}
-
-typedef struct {
-    float score;
-    int batch_index;
-    int class_index;
-    int box_index;
-} filteredBoxes;
-
-static void ref_nms(
-        InferenceEngine::TBlob<float> &srcBoxes,
-        InferenceEngine::TBlob<float> &srcScores,
-        InferenceEngine::TBlob<int> &selected_idxs,
-        nmsTF_test_params p
-) {
-    float *boxes = srcBoxes.data();
-    float *scores = srcScores.data();
-
-    InferenceEngine::SizeVector scores_dims = srcScores.getTensorDesc().getDims();
-    int num_boxes = static_cast<int>(scores_dims[2]);
-    int max_output_boxes_per_class = num_boxes;
-    if (p.max_output_boxes_per_class.size())
-        max_output_boxes_per_class = (std::min)(max_output_boxes_per_class, p.max_output_boxes_per_class[0]);
-
-    float iou_threshold = 1.f;  //  Value range [0, 1]
-    if (p.iou_threshold.size())
-        iou_threshold = (std::min)(iou_threshold, p.iou_threshold[0]);
-
-    float score_threshold = 0.f;
-    if (p.score_threshold.size())
-        score_threshold = p.score_threshold[0];
-
-    int* selected_indices = selected_idxs.data();
-    InferenceEngine::SizeVector selected_indices_dims = selected_idxs.getTensorDesc().getDims();
-
-    InferenceEngine::SizeVector boxesStrides = srcBoxes.getTensorDesc().getBlockingDesc().getStrides();
-    InferenceEngine::SizeVector scoresStrides = srcScores.getTensorDesc().getBlockingDesc().getStrides();
-
-    // boxes shape: {num_batches, num_boxes, 4}
-    // scores shape: {num_batches, num_classes, num_boxes}
-    int num_batches = static_cast<int>(scores_dims[0]);
-    int num_classes = static_cast<int>(scores_dims[1]);
-    std::vector<filteredBoxes> fb;
-
-    for (int batch = 0; batch < num_batches; batch++) {
-        float *boxesPtr = boxes + batch * boxesStrides[0];
-        for (int class_idx = 0; class_idx < num_classes; class_idx++) {
-            float *scoresPtr = scores + batch * scoresStrides[0] + class_idx * scoresStrides[1];
-            std::vector<std::pair<float, int> > scores_vector;
-            for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
-                if (scoresPtr[box_idx] > score_threshold)
-                    scores_vector.push_back(std::make_pair(scoresPtr[box_idx], box_idx));
-            }
-
-            if (scores_vector.size()) {
-                std::sort(scores_vector.begin(), scores_vector.end(),
-                          [](const std::pair<float, int>& l, const std::pair<float, int>& r) { return l.first > r.first; });
-
-                int io_selection_size = 1;
-                fb.push_back({ scores_vector[0].first, batch, class_idx, scores_vector[0].second });
-                for (int box_idx = 1; (box_idx < static_cast<int>(scores_vector.size()) && io_selection_size < max_output_boxes_per_class); box_idx++) {
-                    bool box_is_selected = true;
-                    for (int idx = io_selection_size - 1; idx >= 0; idx--) {
-                        float iou = intersectionOverUnion(&boxesPtr[scores_vector[box_idx].second * 4],
-                                                          &boxesPtr[scores_vector[idx].second * 4], (p.center_point_box == 1));
-                        if (iou > iou_threshold) {
-                            box_is_selected = false;
-                            break;
-                        }
-                    }
-
-                    if (box_is_selected) {
-                        scores_vector[io_selection_size] = scores_vector[box_idx];
-                        io_selection_size++;
-                        fb.push_back({ scores_vector[box_idx].first, batch, class_idx, scores_vector[box_idx].second });
-                    }
-                }
-            }
-        }
-    }
-
-    if(p.sort_result_descending)
-        std::sort(fb.begin(), fb.end(), [](const filteredBoxes& l, const filteredBoxes& r) { return l.score > r.score; });
-    int selected_indicesStride = selected_idxs.getTensorDesc().getBlockingDesc().getStrides()[0];
-    int* selected_indicesPtr = selected_indices;
-    size_t idx;
-    for (idx = 0; idx < (std::min)(selected_indices_dims[0], fb.size()); idx++) {
-        selected_indicesPtr[0] = fb[idx].batch_index;
-        selected_indicesPtr[1] = fb[idx].class_index;
-        selected_indicesPtr[2] = fb[idx].box_index;
-        selected_indicesPtr += selected_indicesStride;
-    }
-    for (; idx < selected_indices_dims[0]; idx++) {
-        selected_indicesPtr[0] = -1;
-        selected_indicesPtr[1] = -1;
-        selected_indicesPtr[2] = -1;
-        selected_indicesPtr += selected_indicesStride;
-    }
-}
-
-class MKLDNNCPUExtNonMaxSuppressionTFTests : public TestsCommon, public WithParamInterface<nmsTF_test_params> {
-    std::string model_t2 = R"V0G0N(
-<net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputBoxes" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IBOXES_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputScores" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    _ISCORES_
-                </port>
-            </output>
-        </layer>
-        <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
-            <data center_point_box="_CPB_" sort_result_descending="_SRD_"/>
-            <input>
-                <port id="1">
-                    _IBOXES_
-                </port>
-                <port id="2">
-                    _ISCORES_
-                </port>
-            </input>
-            <output>
-                <port id="6" precision="I32">
-                    _IOUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string model_t3 = R"V0G0N(
-<net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputBoxes" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IBOXES_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputScores" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    _ISCORES_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputBoxesPerClass" type="Input" precision="I32" id="3">
-            <output>
-                <port id="3"/>
-            </output>
-        </layer>
-        <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
-            <data center_point_box="_CPB_" sort_result_descending="_SRD_"/>
-            <input>
-                <port id="1">
-                    _IBOXES_
-                </port>
-                <port id="2">
-                    _ISCORES_
-                </port>
-                <port id="3" precision="I32"/>
-            </input>
-            <output>
-                <port id="6" precision="I32">
-                    _IOUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
-        <edge from-layer="3" from-port="3" to-layer="6" to-port="3"/>
-    </edges>
-</net>
-)V0G0N";
-    std::string model_t4 = R"V0G0N(
-<net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputBoxes" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IBOXES_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputScores" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    _ISCORES_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputBoxesPerClass" type="Input" precision="I32" id="3">
-            <output>
-                <port id="3"/>
-            </output>
-        </layer>
-        <layer name="InputIouThr" type="Input" precision="FP32" id="4">
-            <output>
-                <port id="4"/>
-            </output>
-        </layer>
-        <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
-            <data center_point_box="_CPB_" sort_result_descending="_SRD_"/>
-            <input>
-                <port id="1">
-                    _IBOXES_
-                </port>
-                <port id="2">
-                    _ISCORES_
-                </port>
-                <port id="3" precision="I32"/>
-                <port id="4"/>
-            </input>
-            <output>
-                <port id="6" precision="I32">
-                    _IOUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
-        <edge from-layer="3" from-port="3" to-layer="6" to-port="3"/>
-        <edge from-layer="4" from-port="4" to-layer="6" to-port="4"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string model_t5 = R"V0G0N(
-<net Name="NonMaxSuppression_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputBoxes" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IBOXES_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputScores" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    _ISCORES_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputBoxesPerClass" type="Input" precision="I32" id="3">
-            <output>
-                <port id="3"/>
-            </output>
-        </layer>
-        <layer name="InputIouThr" type="Input" precision="FP32" id="4">
-            <output>
-                <port id="4"/>
-            </output>
-        </layer>
-        <layer name="InputScoreThr" type="Input" precision="FP32" id="5">
-            <output>
-                <port id="5"/>
-            </output>
-        </layer>
-        <layer name="non_max_suppression" type="NonMaxSuppression" precision="FP32" id="6">
-            <data center_point_box="_CPB_" sort_result_descending="_SRD_"/>
-            <input>
-                <port id="1">
-                    _IBOXES_
-                </port>
-                <port id="2">
-                    _ISCORES_
-                </port>
-                <port id="3" precision="I32"/>
-                <port id="4"/>
-                <port id="5"/>
-            </input>
-            <output>
-                <port id="6" precision="I32">
-                    _IOUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="6" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="6" to-port="2"/>
-        <edge from-layer="3" from-port="3" to-layer="6" to-port="3"/>
-        <edge from-layer="4" from-port="4" to-layer="6" to-port="4"/>
-        <edge from-layer="5" from-port="5" to-layer="6" to-port="5"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(nmsTF_test_params p) {
-        std::string model;
-        if (!p.max_output_boxes_per_class.size())
-            model = model_t2;
-        else if (!p.iou_threshold.size())
-            model = model_t3;
-        else if (!p.score_threshold.size())
-            model = model_t4;
-        else
-            model = model_t5;
-
-        std::string inBoxes;
-        std::string inScores;
-        std::string out;
-
-        inBoxes += "<dim>" + std::to_string(p.scoresDim[0]) + "</dim>\n";
-        inBoxes += "<dim>" + std::to_string(p.scoresDim[2]) + "</dim>\n";
-        inBoxes += "<dim>4</dim>";
-
-
-        for (auto& scr : p.scoresDim) {
-            inScores += "<dim>";
-            inScores += std::to_string(scr) + "</dim>\n";
-        }
-
-        out += "<dim>" + std::to_string(p.num_selected_indices) + "</dim>\n";
-        out += "<dim>3</dim>";
-
-        REPLACE_WITH_STR(model, "_IBOXES_", inBoxes);
-        REPLACE_WITH_STR(model, "_ISCORES_", inScores);
-        REPLACE_WITH_STR(model, "_IOUT_", out);
-        REPLACE_WITH_NUM(model, "_CPB_", p.center_point_box);
-        REPLACE_WITH_NUM(model, "_SRD_", p.sort_result_descending);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            nmsTF_test_params p = ::testing::WithParamInterface<nmsTF_test_params>::GetParam();
-            std::string model = getModel(p);
-            //std::cout << model << std::endl;
-                        InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            //  Input
-            InferenceEngine::BlobMap srcs;
-
-            //  Input Boxes
-            InferenceEngine::SizeVector boxesDim = {p.scoresDim[0], p.scoresDim[2], 4};
-            InferenceEngine::Blob::Ptr srcBoxes = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, boxesDim, InferenceEngine::TensorDesc::getLayoutByDims(boxesDim) });
-            srcBoxes->allocate();
-            for (size_t i = 0; i < p.boxes.size(); i++) {
-                static_cast<float*>(srcBoxes->buffer())[i] = static_cast<float>(p.boxes[i]);
-            }
-            //memcpy(srcBoxes->buffer(), &p.boxes[0], sizeof(float)*boxes.size());
-            auto * srcBoxesPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcBoxes.get());
-            if (srcBoxesPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputBoxes", srcBoxes));
-
-            // Input Scores
-            InferenceEngine::Blob::Ptr srcScores = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.scoresDim, InferenceEngine::TensorDesc::getLayoutByDims(p.scoresDim) });
-            srcScores->allocate();
-            for (size_t i = 0; i < p.scores.size(); i++) {
-                static_cast<float*>(srcScores->buffer())[i] = static_cast<float>(p.scores[i]);
-            }
-            auto * srcScoresPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcScores.get());
-            if (srcScoresPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputScores", srcScores));
-
-            // Input BoxesPerClass
-            InferenceEngine::Blob::Ptr srcBoxesPerClass;
-            InferenceEngine::Blob::Ptr srcIouThr;
-            InferenceEngine::Blob::Ptr srcScoreThr;
-            if (p.max_output_boxes_per_class.size()) {
-                srcBoxesPerClass = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, {}, InferenceEngine::TensorDesc::getLayoutByDims({}) });
-                srcBoxesPerClass->allocate();
-                memcpy(static_cast<int32_t*>(srcBoxesPerClass->buffer()), &p.max_output_boxes_per_class[0], sizeof(int32_t));
-                auto * srcBoxesPerClassPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(srcBoxesPerClass.get());
-                if (srcBoxesPerClassPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputBoxesPerClass", srcBoxesPerClass));
-            }
-
-            // Input IouThr
-            if (p.iou_threshold.size()) {
-                srcIouThr = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, {}, InferenceEngine::TensorDesc::getLayoutByDims({}) });
-                srcIouThr->allocate();
-                memcpy(static_cast<float*>(srcIouThr->buffer()), &p.iou_threshold[0], sizeof(float));
-                auto * srcIouThrPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcIouThr.get());
-                if (srcIouThrPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputIouThr", srcIouThr));
-            }
-
-            // Input ScoreThr
-            if (p.score_threshold.size()) {
-                srcScoreThr = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, {}, InferenceEngine::TensorDesc::getLayoutByDims({}) });
-                srcScoreThr->allocate();
-                memcpy(static_cast<float*>(srcScoreThr->buffer()), &p.score_threshold[0], sizeof(float));
-                auto * srcScoreThrPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcScoreThr.get());
-                if (srcScoreThrPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputScoreThr", srcScoreThr));
-            }
-
-            //  Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-            InferenceEngine::TBlob<int32_t>::Ptr output;
-            output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            //  Infer
-            graph.Infer(srcs, outputBlobs);
-
-            // Output Reference
-            if (!p.ref.size()) {
-                InferenceEngine::TBlob <int32_t> selected_indices_ref(item.second->getTensorDesc());
-                selected_indices_ref.allocate();
-                ref_nms(*srcBoxesPtr, *srcScoresPtr, selected_indices_ref, p);
-                compare(*output, selected_indices_ref);
-            } else {
-                //  Check results
-                if (p.ref.size() != output->size())
-                    FAIL() << "Wrong result vector size!";
-                if (memcmp((*output).data(), &p.ref[0], output->byteSize()) != 0)
-                    FAIL() << "Wrong result with compare TF reference!";
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtNonMaxSuppressionTFTests, TestsNonMaxSuppression) {}
-
-static std::vector<float> boxes = { 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0 };
-static std::vector<float> scores = { 0.9f, 0.75f, 0.6f, 0.95f, 0.5f, 0.3f };
-static std::vector<int> reference = { 0,0,3,0,0,0,0,0,5 };
-
-INSTANTIATE_TEST_CASE_P(
-        TestsNonMaxSuppression, MKLDNNCPUExtNonMaxSuppressionTFTests,
-        ::testing::Values(
-// Params: center_point_box, sort_result_descending, scoresDim, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, num_selected_indices, ref
-
-            nmsTF_test_params{ 1, 1, {1,1,6}, { 0.5f, 0.5f, 1.0f, 1.0f,0.5f, 0.6f, 1.0f, 1.0f,0.5f, 0.4f, 1.0f, 1.0f,0.5f, 10.5f, 1.0f, 1.0f, 0.5f, 10.6f, 1.0f, 1.0f, 0.5f, 100.5f, 1.0f, 1.0f },
-            scores,{ 3 },{ 0.5f },{ 0.f }, 3, reference }, /*nonmaxsuppression_center_point_box_format*/
-
-            nmsTF_test_params{ 0, 1, {1,1,6}, { 1.0, 1.0, 0.0, 0.0, 0.0, 0.1, 1.0, 1.1, 0.0, 0.9, 1.0, -0.1, 0.0, 10.0, 1.0, 11.0, 1.0, 10.1, 0.0, 11.1, 1.0, 101.0, 0.0, 100.0 },
-            scores,{ 3 },{ 0.5 },{ 0.0 }, 3, reference }, /*nonmaxsuppression_flipped_coordinates*/
-
-            nmsTF_test_params{ 0, 1, { 1,1,10 },{ 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0,
-                                               0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0 },
-            { 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9 },{ 3 },{ 0.5 },{ 0.0 }, 1,{ 0,0,0 } }, /*nonmaxsuppression_identical_boxes*/
-
-            nmsTF_test_params{ 0, 1, { 1,1,6 }, boxes, scores,{ 2 },{ 0.5 },{ 0.0 }, 2,{ 0,0,3,0,0,0 } }, /*nonmaxsuppression_limit_output_size*/
-
-            nmsTF_test_params{ 0, 1,{ 1,1,1 },{ 0.0, 0.0, 1.0, 1.0 }, { 0.9 },{ 3 },{ 0.5 },{ 0.0 }, 1, { 0,0,0 } }, /*nonmaxsuppression_single_box*/
-
-            nmsTF_test_params{ 0, 1, { 1,1,6 }, boxes, scores, { 3 }, { 0.5 }, { 0.0 }, 3, reference }, /*nonmaxsuppression_suppress_by_IOU*/
-
-            nmsTF_test_params{ 0, 1, { 1,1,6 }, boxes, scores, { 3 }, { 0.5 }, { 0.4 }, 2, { 0,0,3,0,0,0 } }, /*nonmaxsuppression_suppress_by_IOU_and_scores*/
-
-            nmsTF_test_params{ 0, 0, { 2,1,6 },{ 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
-                                             0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0 },
-            { 0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3 },{ 2 },{ 0.5 },{ 0.0 }, 4,{ 0,0,3,0,0,0,1,0,3,1,0,0 } }, /*nonmaxsuppression_two_batches*/
-
-            nmsTF_test_params{ 0, 1, { 2,1,6 },{ 0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
-                                             0.0, 0.0, 1.0, 1.0, 0.0, 0.1, 1.0, 1.1, 0.0, -0.1, 1.0, 0.9, 0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0 },
-            { 0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3 },{ 2 },{ 0.5 },{ 0.0 }, 4,{ 0,0,3,1,0,3,0,0,0,1,0,0 } }, /*nonmaxsuppression_two_batches*/
-
-            nmsTF_test_params{ 0, 0, { 1,2,6 }, boxes,
-            { 0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3 },{ 2 },{ 0.5 },{ 0.0 }, 4,{ 0,0,3,0,0,0,0,1,3,0,1,0 } }, /*nonmaxsuppression_two_classes*/
-
-            nmsTF_test_params{ 0, 1, { 1,2,6 }, boxes,
-            { 0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3 },{ 2 },{ 0.5 },{ 0.0 }, 4,{ 0,0,3,0,1,3,0,0,0,0,1,0 } }, /*nonmaxsuppression_two_classes*/
-
-            nmsTF_test_params{ 0, 1, { 1,1,6 }, boxes, scores, { 3 }, { 0.5 }, {}, 3, reference }, /*nonmaxsuppression_no_score_threshold*/
-
-            nmsTF_test_params{ 0, 1, { 1,1,6 }, boxes, scores, { 3 }, {}, {}, 3, { 0,0,3,0,0,0,0,0,1 } }, /*nonmaxsuppression_no_iou_threshold_and_score_threshold*/
-
-            nmsTF_test_params{ 0, 1, { 1,1,6 }, boxes, scores, {}, {}, {}, 3, {} } /*nonmaxsuppression_no_max_output_boxes_per_class_and_iou_threshold_and_score_threshold*/
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp
deleted file mode 100644
index 8ad5d5bbd89..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp
+++ /dev/null
@@ -1,640 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "common_test_utils/data_utils.hpp"
-#include "ir_gen_helper.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-#include <nodes/base.hpp>
-#include <ie_system_conf.h>
-
-
-using namespace InferenceEngine;
-using namespace ::testing;
-using namespace std;
-using namespace single_layer_tests;
-
-using namespace Extensions;
-using namespace ::Cpu;
-
-namespace {
-
-OV_CC_DOMAINS(NormalizeTests);
-
-}   // namespace
-
-struct normalize_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in;
-    int across_spatial;
-    int channel_shared;
-    float eps;
-    bool isBlockedFormat;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    Precision prec_in;
-    Precision prec_out;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-extern InferenceEngine::IExtensionPtr make_FakeExtensions();
-
-template <typename data_t>
-void ref_normalize(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst, normalize_test_params prm, const float *weights) {
-    int B = static_cast<int>(src.getTensorDesc().getDims()[0]);
-    int C = static_cast<int>(src.getTensorDesc().getDims()[1]);
-    int H = static_cast<int>(src.getTensorDesc().getDims()[2]);
-    int W = static_cast<int>(src.getTensorDesc().getDims()[3]);
-            
-    float eps = prm.eps;
-    
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-    
-    for (int b = 0; b < B; b++) {
-        const data_t *src_data_b = src_data + b * C * H * W;
-        data_t *dst_data_b = dst_data + b * C * H * W;
-        if (prm.across_spatial) {
-            float sqrt_sum = 0.f;
-            for (int i = 0; i < H * W * C; i++) {
-                sqrt_sum += (src_data_b[i] * src_data_b[i]);
-            }
-
-            sqrt_sum = std::sqrt(sqrt_sum) + eps;
-
-            for (int c = 0; c < C; c++) {
-                float s = prm.channel_shared ? weights[0] : weights[c];
-                for (int hw = 0; hw < H * W; hw++) {
-                    float dst_value = (src_data_b[c * H * W + hw] / sqrt_sum) * s;
-                    if (prm.prec_out == Precision::FP32) {
-                        dst_data_b[c * H * W + hw] = dst_value;
-                    } else if (prm.prec_out == Precision::U8) {
-                        dst_data_b[c * H * W + hw] = (dst_value > 0) ? roundf(dst_value) : 0;
-                    } else if (prm.prec_out == Precision::I8) {
-                        dst_data_b[c * H * W + hw] = roundf(dst_value);
-                    }
-                }
-            }
-        } else {
-            for(int i = 0; i<H*W; i++) {
-                int offset = i;
-
-                float norm = 0.f;
-                for (int c = 0; c < C; c++) {
-                    const data_t *src_data_b_c = src_data_b + c * W * H;
-                    norm += src_data_b_c[offset] * src_data_b_c[offset];
-                }
-
-                norm = std::sqrt(norm) + eps;
-
-                for (int c = 0; c < C; c++) {
-                    const data_t *src_data_b_c = src_data_b + c * W * H;
-                    data_t *dst_data_b_c = dst_data_b + c * W * H;
-
-                    float dst_value = prm.channel_shared ? (src_data_b_c[offset] / norm * weights[0]) : (src_data_b_c[offset] / norm * weights[c]);
-                    if (prm.prec_out == Precision::FP32) {
-                        dst_data_b_c[offset] = dst_value;
-                    } else if (prm.prec_out == Precision::U8) {
-                        dst_data_b_c[offset] = (dst_value > 0) ? roundf(dst_value) : 0;
-                    } else if (prm.prec_out == Precision::I8) {
-                        dst_data_b_c[offset] = roundf(dst_value);
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNCPUExtNormalizeTests: public TestsCommon, public WithParamInterface<normalize_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Normalize_Net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-
-        <layer name="fakeLayer" id="1" type="_FL_" precision="FP32">
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="normalize" id="2" type="Normalize" precision="FP32">
-            <data across_spatial="_AS_" channel_shared="_CS_" eps="_EPS_" />
-            <weights offset="0" size="_WS_" />
-
-            <input>
-                <port id="3">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    std::string getModel(normalize_test_params p) {
-        std::string model = model_t;
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-        
-        REPLACE_WITH_NUM(model, "_AS_", p.across_spatial);
-        REPLACE_WITH_NUM(model, "_CS_", p.channel_shared);
-
-        REPLACE_WITH_NUM(model, "_WS_", p.in.c*sizeof(float));
-        REPLACE_WITH_NUM(model, "_EPS_", p.eps);
-
-        if (p.isBlockedFormat)
-            REPLACE_WITH_STR(model, "_FL_", "FakeLayerBLK");
-        else
-            REPLACE_WITH_STR(model, "_FL_", "FakeLayerPLN");
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            normalize_test_params p = ::testing::WithParamInterface<normalize_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-
-            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-            auto defaultExtensions = std::make_shared<InferenceEngine::Extensions::Cpu::MKLDNNExtensions>();
-            extMgr->AddExtension(defaultExtensions);
-            extMgr->AddExtension(make_FakeExtensions());
-
-            size_t weightSize = p.in.c*sizeof(float);
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8,
-                {weightSize}, InferenceEngine::C });
-            weights->allocate();
-            float center = 0;
-            float ampl = 100;
-            float omega = 0.5;
-            CommonTestUtils::fill_data_sine( weights->data().as<float*>(), weights->size() / sizeof(float), center, ampl, omega);
-
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network, extMgr);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-            for (auto &node : nodes) {
-                if (node->getName() == "normalize") {
-                    ASSERT_LE(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                              node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-            ASSERT_LE(3, nodes.size());
-
-            InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-            ref_normalize(*srcPtr, dst_ref, p, weights->readOnly().as<const float*>());
-            compare(*output, dst_ref);
-
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtNormalizeTests, TestsNormalize) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsNormalize, MKLDNNCPUExtNormalizeTests,
-        ::testing::Values(
-                normalize_test_params{{1, 22, 129, 323}, false, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 22, 129, 323}, false, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{5, 1, 128, 256}, false, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{5, 1, 128, 256}, false, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 2, 129, 323}, true, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 2, 129, 323}, true, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{2, 1, 21, 21}, true, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{2, 1, 21, 21}, true, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{2, 1, 21, 21}, true, true, 0.001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 35, 101, 127}, true, true, 0.001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 35, 101, 127}, true, false, 0.001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 128, 320, 320}, false, true, 0.001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 22, 129, 323}, false, false, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 22, 129, 323}, false, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{5, 1, 128, 256}, false, false, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{5, 1, 128, 256}, false, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 2, 129, 323}, true, false, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 2, 129, 323}, true, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{2, 1, 21, 21}, true, false, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{2, 1, 21, 21}, true, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{2, 1, 21, 21}, true, true, 0.001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 35, 101, 127}, true, true, 0.001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 35, 101, 127}, true, false, 0.001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-                normalize_test_params{{1, 128, 320, 320}, false, true, 0.001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 }
-                ));
-
-static std::string precToStr (Precision prec) {
-    return prec == Precision::U8 ? "U8" : prec == Precision::I8 ? "I8" : "FP32";
-}
-
-template <typename data_t>
-static void fill_int_data(data_t *data, int size, bool is_signed) {
-    for (int i = 0 ; i < size; i++) {
-        data[i] = i * 13 % 21 - 10 * is_signed;
-    }
-}
-
-class FakeLayerImpl_Normalize: public Cpu::ExtLayerBase,
-                     public WithParamInterface<normalize_test_params> {
-public:
-    explicit FakeLayerImpl_Normalize(const CNNLayer* layer) {
-        try {
-            is_blocked = layer->GetParamAsBool("is_blocked");
-            addConfig(layer);
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    bool is_blocked;
-
-    void addConfig(const CNNLayer* layer) {
-        LayerConfig config;
-
-        // Fill tensor parameters into config
-        auto fill_port = [&] (std::vector<DataConfig>& port, const DataPtr& data) {
-            auto div_up = [](const int a, const int b) -> int {
-                if (!b)
-                    return 0;
-                return (a + b - 1) / b;
-            };
-            if (!data) IE_THROW() << "Cannot get input data!";
-
-            DataConfig dataConfig;
-            dataConfig.inPlace = 0;
-            dataConfig.constant = false;
-
-            const TensorDesc& data_desc = data->getTensorDesc();
-            const SizeVector& data_dims = data_desc.getDims();
-
-            InferenceEngine::Precision precision = data_desc.getPrecision();
-            if (is_blocked) {
-                int blk_size = InferenceEngine::with_cpu_x86_avx512f() ? 16 : 8;
-
-                std::vector<size_t> blocks = data_dims;
-                std::vector<size_t> order(blocks.size());
-                for (size_t i = 0; i < order.size(); i++) order[i] = i;
-
-                order.push_back(1);
-                blocks[1] = div_up(blocks[1], blk_size);
-                blocks.push_back(blk_size);
-
-                dataConfig.desc = TensorDesc(precision, data_dims, {blocks, order});
-            } else {
-                dataConfig.desc = TensorDesc(precision, data_dims, data_dims.size() == 5 ? NDHWC : NHWC);
-            }
-
-            port.push_back(dataConfig);
-        };
-
-        fill_port(config.inConfs, layer->insData[0].lock());
-        fill_port(config.outConfs, layer->outData[0]);
-        config.inConfs[0].desc.setPrecision(config.outConfs[0].desc.getPrecision());
-        confs.push_back(config);
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        return OK;
-    }
-};
-
-class MKLDNNCPUExtNormalizeTests_Blocked: public TestsCommon, public WithParamInterface<normalize_test_params> {
-    std::string model_t = R"V0G0N(
-        <layer name="fakeLayer1" id="1" type="FakeLayer_Normalize">
-            <data is_blocked="_IS_BLOCKED_"/>
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2" precision="_PREC_IN_">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="normalize" id="2" type="Normalize">
-            <data across_spatial="_AS_" channel_shared="_CS_" eps="_EPS_" />
-            <weights offset="0" size="_WS_" />
-            <input>
-                <port id="3">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4" precision="_PREC_OUT_">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="fakeLayer2" id="3" type="FakeLayer_Normalize">
-            <data is_blocked="_IS_BLOCKED_"/>
-            <input>
-                <port id="5">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="6" precision="_PREC_OUT_">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-        <edge from-layer="2" from-port="4" to-layer="3" to-port="5"/>
-)V0G0N";
-
-    std::string getModel(normalize_test_params p) {
-        std::string model = model_t;
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-
-        REPLACE_WITH_NUM(model, "_AS_", p.across_spatial);
-        REPLACE_WITH_NUM(model, "_CS_", p.channel_shared);
-
-        REPLACE_WITH_NUM(model, "_WS_", p.in.c*sizeof(float));
-        REPLACE_WITH_NUM(model, "_EPS_", p.eps);
-        REPLACE_WITH_STR(model, "_PREC_IN_", precToStr(p.prec_in));
-        REPLACE_WITH_STR(model, "_PREC_OUT_", precToStr(p.prec_out));
-        REPLACE_WITH_NUM(model, "_IS_BLOCKED_", p.isBlockedFormat);
-
-        model = IRTemplateGenerator::getIRTemplate("Normalize_Only", {p.in.n, p.in.c, p.in.h, p.in.w}, "FP32", model, edges_t, 7);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            normalize_test_params p = ::testing::WithParamInterface<normalize_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-
-            size_t weightSize = p.in.c*sizeof(float);
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8,
-                {weightSize}, InferenceEngine::C });
-            weights->allocate();
-            float center = 0;
-            float ampl = 100;
-            float omega = 0.5;
-            CommonTestUtils::fill_data_sine( weights->data().as<float*>(), weights->size() / sizeof(float), center, ampl, omega);
-
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
-            {
-                auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
-                defaultExt->layersFactory.registerNodeIfRequired(NormalizeTests, FakeLayer_Normalize, "FakeLayer_Normalize", Cpu::ImplFactory<FakeLayerImpl_Normalize>);
-                manager->AddExtension(defaultExt);
-            }
-            graph.CreateGraph(network, manager);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-            for (auto &node : nodes) {
-                if (node->getName() == "normalize") {
-                    ASSERT_LE(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                              node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, NCHW});
-            src->allocate();
-            if (p.prec_in == Precision::U8) {
-                fill_int_data(src->buffer().as<float *>(), src->size(), false);
-            } else if (p.prec_in == Precision::I8) {
-                fill_int_data(src->buffer().as<float *>(), src->size(), true);
-            } else {
-                fill_data(src->buffer(), src->size());
-            }
-
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-            ref_normalize(*srcPtr, dst_ref, p, weights->readOnly().as<const float*>());
-            compare(*output, dst_ref);
-
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtNormalizeTests_Blocked, TestsNormalize) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsNormalize, MKLDNNCPUExtNormalizeTests_Blocked,
-        ::testing::Values(
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-            normalize_test_params{{2, 33, 129, 323}, true, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-            normalize_test_params{{2, 67, 77, 78}, false, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 },
-            normalize_test_params{{2, 33, 129, 323}, true, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 },
-            normalize_test_params{{2, 67, 77, 78}, false, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 },
-            normalize_test_params{{2, 33, 129, 323}, true, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 },
-            normalize_test_params{{2, 67, 77, 78}, false, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 },
-            normalize_test_params{{2, 33, 129, 323}, true, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 },
-            normalize_test_params{{2, 67, 77, 78}, false, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 },
-            normalize_test_params{{2, 33, 129, 323}, true, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 },
-            normalize_test_params{{2, 67, 77, 78}, false, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 },
-            normalize_test_params{{2, 33, 129, 323}, true, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 },
-            normalize_test_params{{2, 67, 77, 78}, false, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 },
-            normalize_test_params{{2, 33, 129, 323}, true, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 },
-            normalize_test_params{{2, 67, 77, 78}, false, true, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, false, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::FP32 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::I8 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::FP32, Precision::U8 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 },
-            normalize_test_params{{2, 67, 77, 78}, false, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::I8 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 },
-            normalize_test_params{{2, 67, 77, 78}, false, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::I8, Precision::FP32 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.000001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::U8 },
-
-            normalize_test_params{{2, 33, 129, 323}, true, true, 0.0001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 },
-            normalize_test_params{{2, 67, 77, 78}, false, false, 0.0001f, true, 3, MKLDNNPlugin::impl_desc_type::unknown, Precision::U8, Precision::FP32 }
-        ));
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp
deleted file mode 100644
index aec442364a7..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp
+++ /dev/null
@@ -1,854 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-
-#include "single_layer_common.hpp"
-#include <ie_core.hpp>
-
-using namespace ::testing;
-using namespace InferenceEngine;
-
-struct one_hot_base_params {
-    struct { size_t n, c, h, w; } in;
-    struct { size_t d, n, c, h, w; } out;
-    int axis;
-    unsigned int depth;
-    float on, off;
-};
-
-struct one_hot_test_params : one_hot_base_params {
-    std::string device_name;
-
-    one_hot_test_params(std::string name, one_hot_base_params params) :
-            one_hot_base_params(params), device_name(name) {}
-};
-
-class OneHotOnly1dTest: public TestsCommon,
-                       public WithParamInterface<one_hot_test_params> {
-
-    std::string model_t = R"V0G0N(
-<net name="OneHot_Only" version="2" precision="I32" batch="1">
-    <layers>
-        <layer id="1" name="input" precision="I32" type="Input">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="2" name="OneHot1" type="OneHot" precision="FP32">
-
-            <data depth="_DEPTH_" axis="_AXIS_"/>
-
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>l
-        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(one_hot_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        REPLACE_WITH_NUM(model, "_DEPTH_", p.depth);
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-
-        return model;
-    }
-    void ref_one_hot_1d(InferenceEngine::Blob &src, InferenceEngine::Blob &dst, one_hot_test_params p)
-    {
-        float *src_ptr = src.buffer().as<float*>();
-        std::size_t src_size = src.size();
-        float *dst_ptr = dst.buffer().as<float*>();
-        std::size_t dst_size = dst.size();
-
-        for (int ow = 0; ow < p.out.w; ow++) {
-            std::size_t src_offset = 0;
-            std::size_t dst_offset = ow;
-
-            int hot_axis = -1;
-            if (p.axis == -1) {
-                hot_axis = ow;
-                src_offset = 0;
-            } else if (p.axis == 0) {
-                hot_axis = ow;
-                src_offset = 0;
-            }
-            int v = src_ptr[src_offset];
-
-            dst_ptr[dst_offset] = (v == hot_axis) ? p.on : p.off;
-        }
-    }
-
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            one_hot_test_params p = ::testing::WithParamInterface<one_hot_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            try {
-                network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr());
-            } catch (InferenceEngine::Exception &e) {
-                FAIL() << e.what();
-            } catch (std::exception &e) {
-                FAIL() << e.what();
-            }
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            SizeVector dims_src = {};
-            TBlob<float> src({Precision::FP32, dims_src, Layout::SCALAR});
-            src.allocate();
-            float * s = src.buffer().as<float*>();
-            s[0] = 2;
-
-            ref_one_hot_1d(src, dst_ref, p);
-
-            InferenceEngine::Blob::Ptr pSrc = make_shared_blob<float>(src);
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", pSrc));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-
-
-class OneHotOnly2dTest: public TestsCommon,
-                       public WithParamInterface<one_hot_test_params> {
-
-    std::string model_t = R"V0G0N(
-<net name="OneHot_Only" version="2" precision="I32" batch="1">
-    <layers>
-        <layer id="1" name="input" precision="I32" type="Input">
-            <output>
-                <port id="0">
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="2" name="OneHot1" type="OneHot" precision="FP32">
-
-            <data depth="_DEPTH_" axis="_AXIS_"/>
-
-            <input>
-                <port id="1">
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>l
-        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(one_hot_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        REPLACE_WITH_NUM(model, "_DEPTH_", p.depth);
-
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h);
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-
-        return model;
-    }
-    void ref_one_hot_2d(InferenceEngine::Blob &src, InferenceEngine::Blob &dst, one_hot_test_params p)
-    {
-        float *src_ptr = src.buffer().as<float*>();
-        std::size_t src_size = src.size();
-        float *dst_ptr = dst.buffer().as<float*>();
-        std::size_t dst_size = dst.size();
-
-        for (int oh = 0; oh < p.out.h; oh++) {
-            for (int ow = 0; ow < p.out.w; ow++) {
-                std::size_t src_offset = 0;
-
-                std::size_t dst_offset = ow + p.out.w * oh;
-
-                int hot_axis = -1;
-                if (p.axis == -1) {
-                    hot_axis = ow;
-                    src_offset = oh;
-                } else if (p.axis == 0) {
-                    hot_axis = oh;
-                    src_offset = ow;
-                } else if (p.axis == 1) {
-                    hot_axis = ow;
-                    src_offset = oh;
-                }
-                int v = src_ptr[src_offset];
-
-                dst_ptr[dst_offset] = (v == hot_axis) ? p.on : p.off;
-            }
-        }
-    }
-
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            one_hot_test_params p = ::testing::WithParamInterface<one_hot_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            try {
-                network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr());
-            } catch (InferenceEngine::Exception &e) {
-                FAIL() << e.what();
-            } catch (std::exception &e) {
-                FAIL() << e.what();
-            }
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            SizeVector dims_src = {p.in.w};
-            TBlob<float> src({Precision::FP32, dims_src, Layout::C});
-            src.allocate();
-            float * s = src.buffer().as<float*>();
-            for (int i = 0; i < src.size(); ++i)
-                s[i] = -1;
-            s[0] = 3;
-            s[2] = 2;
-
-            // Check results
-            InferenceEngine::SizeVector out_dims = {p.out.w, p.out.h};
-            ref_one_hot_2d(src, dst_ref, p);
-
-            InferenceEngine::Blob::Ptr pSrc = make_shared_blob<float>(src);
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", pSrc));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-
-class OneHotOnly3dTest: public TestsCommon,
-                       public WithParamInterface<one_hot_test_params> {
-
-    std::string model_t = R"V0G0N(
-<net name="OneHot_Only" version="2" precision="I32" batch="1">
-    <layers>
-        <layer id="1" name="input" precision="I32" type="Input">
-            <output>
-                <port id="0">
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="2" name="OneHot1" type="OneHot" precision="FP32">
-
-            <data depth="_DEPTH_" axis="_AXIS_" on_value="_ON_VALUE_" off_value="_OFF_VALUE_"/>
-
-            <input>
-                <port id="1">
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>l
-        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(one_hot_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        REPLACE_WITH_NUM(model, "_DEPTH_", p.depth);
-        REPLACE_WITH_NUM(model, "_ON_VALUE_", p.on);
-        REPLACE_WITH_NUM(model, "_OFF_VALUE_", p.off);
-
-        REPLACE_WITH_NUM(model, "_OC_", p.out.c);
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h);
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-
-        return model;
-    }
-    void ref_one_hot_3d(InferenceEngine::Blob &src, InferenceEngine::Blob &dst, one_hot_test_params p)
-    {
-        float *src_ptr = src.buffer().as<float*>();
-        std::size_t src_size = src.size();
-        float *dst_ptr = dst.buffer().as<float*>();
-        std::size_t dst_size = dst.size();
-
-        for (int oc = 0; oc < p.out.c; oc++) {
-            for (int oh = 0; oh < p.out.h; oh++) {
-                for (int ow = 0; ow < p.out.w; ow++) {
-                    std::size_t src_offset = 0;
-
-                    std::size_t dst_offset = ow + p.out.w * oh + p.out.w * p.out.h * oc;
-
-                    int hot_axis = -1;
-                    if (p.axis == -1) {
-                        hot_axis = ow;
-                        src_offset = oh + p.in.w * oc;
-                    } else if (p.axis == 0) {
-                        hot_axis = oc;
-                        src_offset = ow + p.in.w * oh;
-                    } else if (p.axis == 1) {
-                        hot_axis = oh;
-                        src_offset = ow + p.in.w * oc;
-                    }
-                    int v = src_ptr[src_offset];
-
-                    dst_ptr[dst_offset] = (v == hot_axis) ? p.on : p.off;
-                }
-            }
-        }
-    }
-
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            one_hot_test_params p = ::testing::WithParamInterface<one_hot_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            try {
-                network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr());
-            } catch (InferenceEngine::Exception &e) {
-                FAIL() << e.what();
-            } catch (std::exception &e) {
-                FAIL() << e.what();
-            }
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            SizeVector dims_src = {p.in.h, p.in.w};
-            TBlob<float> src({Precision::FP32, dims_src, Layout::HW});
-            src.allocate();
-            float * s = src.buffer().as<float*>();
-            for (int i = 0; i < src.size(); ++i)
-                s[i] = -1;
-            s[0] = 3;
-            s[4] = 2;
-
-            // Check results
-            InferenceEngine::SizeVector out_dims = {p.out.w, p.out.h, p.out.c};
-            ref_one_hot_3d(src, dst_ref, p);
-
-            InferenceEngine::Blob::Ptr pSrc = make_shared_blob<float>(src);
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", pSrc));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-class OneHotOnly4dTest: public TestsCommon,
-                       public WithParamInterface<one_hot_test_params> {
-
-    std::string model_t = R"V0G0N(
-<net name="OneHot_Only" version="2" precision="I32" batch="1">
-    <layers>
-        <layer id="1" name="input" precision="I32" type="Input">
-            <output>
-                <port id="0">
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="2" name="OneHot1" type="OneHot" precision="FP32">
-
-            <data depth="_DEPTH_" axis="_AXIS_"/>
-
-            <input>
-                <port id="1">
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_ON_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>l
-        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(one_hot_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        REPLACE_WITH_NUM(model, "_DEPTH_", p.depth);
-
-        REPLACE_WITH_NUM(model, "_ON_", p.out.n);
-        REPLACE_WITH_NUM(model, "_OC_", p.out.c);
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h);
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-
-        return model;
-    }
-void ref_one_hot_4d(InferenceEngine::Blob &src, InferenceEngine::Blob &dst, one_hot_test_params p)
-{
-    float *src_ptr = src.buffer().as<float*>();
-    std::size_t src_size = src.size();
-    float *dst_ptr = dst.buffer().as<float*>();
-    std::size_t dst_size = dst.size();
-
-    for (int ob = 0; ob < p.out.n; ob++) {
-        for (int oc = 0; oc < p.out.c; oc++) {
-            for (int oh = 0; oh < p.out.h; oh++) {
-                for (int ow = 0; ow < p.out.w; ow++) {
-                    std::size_t src_offset = 0;
-
-                    std::size_t dst_offset = ow + p.out.w * oh + p.out.w * p.out.h * oc + p.out.w * p.out.h * p.out.c * ob;
-
-                    int hot_axis = -1;
-                    if (p.axis == -1) {
-                        hot_axis = ow;
-                        src_offset = oh + p.in.w * oc + p.in.w * p.in.h * ob;
-                    } else if (p.axis == 0) {
-                        hot_axis = ob;
-                        src_offset = ow + p.in.w * oh + p.in.w * p.in.h * oc;
-                    } else if (p.axis == 1) {
-                        hot_axis = oc;
-                        src_offset = ow + p.in.w * oh + p.in.w * p.in.h * ob;
-                    } else if (p.axis == 2) {
-                        hot_axis = oh;
-                        src_offset = ow + p.in.w * oc + p.in.w * p.in.h * ob;
-                    }
-                    int v = src_ptr[src_offset];
-
-                    dst_ptr[dst_offset] = (v == hot_axis) ? p.on : p.off;
-                }
-            }
-        }
-    }
-}
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            one_hot_test_params p = ::testing::WithParamInterface<one_hot_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            try {
-                network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr());
-            } catch (InferenceEngine::Exception &e) {
-                FAIL() << e.what();
-            } catch (std::exception &e) {
-                FAIL() << e.what();
-            }
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            SizeVector dims_src = {p.in.c, p.in.h, p.in.w};
-
-            TBlob<float> src({Precision::FP32, dims_src, Layout::CHW});
-            src.allocate();
-
-            float * s = src.buffer().as<float*>();
-            for (int i = 0; i < src.size(); ++i)
-                s[i] = -1;
-            s[0] = 3;
-            s[4] = 2;
-
-            // Check results
-            InferenceEngine::SizeVector out_dims = {p.out.w, p.out.h, p.out.c, p.out.n};
-            ref_one_hot_4d(src, dst_ref, p);
-
-            InferenceEngine::Blob::Ptr pSrc = make_shared_blob<float>(src);
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", pSrc));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-
-class OneHotOnly5dTest: public TestsCommon,
-                       public WithParamInterface<one_hot_test_params> {
-
-    std::string model_t = R"V0G0N(
-<net name="OneHot_Only" version="2" precision="I32" batch="1">
-    <layers>
-        <layer id="1" name="input" precision="I32" type="Input">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="2" name="OneHot1" type="OneHot" precision="FP32">
-
-            <data depth="_DEPTH_" axis="_AXIS_"/>
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_ON_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OD_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>l
-        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(one_hot_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        REPLACE_WITH_NUM(model, "_DEPTH_", p.depth);
-
-        REPLACE_WITH_NUM(model, "_ON_", p.out.n);
-        REPLACE_WITH_NUM(model, "_OC_", p.out.c);
-        REPLACE_WITH_NUM(model, "_OD_", p.out.d);
-        REPLACE_WITH_NUM(model, "_OH_", p.out.h);
-        REPLACE_WITH_NUM(model, "_OW_", p.out.w);
-
-        return model;
-    }
-void ref_one_hot_5d(InferenceEngine::Blob &src, InferenceEngine::Blob &dst, one_hot_test_params p)
-{
-    float *src_ptr = src.buffer().as<float*>();
-    std::size_t src_size = src.size();
-    float *dst_ptr = dst.buffer().as<float*>();
-    std::size_t dst_size = dst.size();
-
-    for (int ob = 0; ob < p.out.n; ob++) {
-        for (int oc = 0; oc < p.out.c; oc++) {
-            for (int od = 0; od < p.out.d; od++) {
-                for (int oh = 0; oh < p.out.h; oh++) {
-                    for (int ow = 0; ow < p.out.w; ow++) {
-                        std::size_t src_offset = 0;
-
-                        std::size_t dst_offset = ow + p.out.w * oh + p.out.w * p.out.h * od \
-                            + p.out.w * p.out.h * p.out.d * oc  + p.out.w * p.out.h * p.out.d * p.out.c * ob;
-
-                        int hot_axis = -1;
-                        if (p.axis == -1 || p.axis == 4) {
-                            hot_axis = ow;
-                            src_offset = oh + p.in.w * od + p.in.w * p.in.h * oc + p.in.w * p.in.h * p.in.c * ob;
-                        } else if (p.axis == 0) {
-                            hot_axis = ob;
-                            src_offset = ow + p.in.w * oh + p.in.w * p.in.h * od + p.in.w * p.in.h * p.in.c * oc;
-                        } else if (p.axis == 1) {
-                            hot_axis = oc;
-                            src_offset = ow + p.in.w * oh + p.in.w * p.in.h * od + p.in.w * p.in.h * p.in.c * ob;
-                        } else if (p.axis == 2) {
-                            hot_axis = od;
-                            src_offset = ow + p.in.w * oh + p.in.w * p.in.h * oc + p.in.w * p.in.h * p.in.c * ob;
-                        } else if (p.axis == 3) {
-                            hot_axis = oh;
-                            src_offset = ow + p.in.w * od + p.in.w * p.in.h * oc + p.in.w * p.in.h * p.in.c * ob;
-                        }
-
-                        int v = src_ptr[src_offset];
-                        dst_ptr[dst_offset] = (v == hot_axis) ? p.on : p.off;
-                    }
-                }
-            }
-        }
-    }
-}
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            one_hot_test_params p = ::testing::WithParamInterface<one_hot_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            try {
-                network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr());
-            } catch (InferenceEngine::Exception &e) {
-                FAIL() << e.what();
-            } catch (std::exception &e) {
-                FAIL() << e.what();
-            }
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-
-            TBlob<float> src({Precision::FP32, dims_src, Layout::NCHW});
-            src.allocate();
-
-            float * s = src.buffer().as<float*>();
-            for (int i = 0; i < src.size(); ++i)
-                s[i] = -1;
-            s[3] = 3;
-            s[7] = 2;
-
-
-
-            // Check results
-            ref_one_hot_5d(src, dst_ref, p);
-
-            InferenceEngine::Blob::Ptr pSrc = make_shared_blob<float>(src);
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", pSrc));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-// 0d -> 1d, depth
-#define case_1d_0 one_hot_base_params({ {0, 0, 0, 0}, {0, 0, 0, 0, 3},-1, 3, 1.0f, 0.0f })
-#define case_1d_1 one_hot_base_params({ {0, 0, 0, 0}, {0, 0, 0, 0, 4}, 0, 4, 1.0f, 0.0f })
-// 1d -> 2d, axis default
-#define case_2d_0 one_hot_base_params({ {0, 0, 0, 3}, {0, 0, 0, 3, 6},-1, 6, 1.0f, 0.0f })
-#define case_2d_1 one_hot_base_params({ {0, 0, 0, 3}, {0, 0, 0, 6, 3}, 0, 6, 1.0f, 0.0f })
-#define case_2d_2 one_hot_base_params({ {0, 0, 0, 3}, {0, 0, 0, 3, 6}, 1, 6, 1.0f, 0.0f })
-// 2d -> 3d, on_value, off_value
-#define case_3d_0 one_hot_base_params({ {0, 0, 3, 2}, {0, 0, 3, 2, 4},-1, 4, 2.0f, -1.0f })
-#define case_3d_1 one_hot_base_params({ {0, 0, 3, 2}, {0, 0, 4, 3, 2}, 0, 4, 2.0f, -1.0f })
-#define case_3d_2 one_hot_base_params({ {0, 0, 3, 2}, {0, 0, 3, 4, 2}, 1, 4, 2.0f, -1.0f })
-// 3d -> 4d
-#define case_4d_0 one_hot_base_params({ {0, 1, 3, 2}, {0, 1, 3, 2, 4},-1, 4, 1.0f, 0.0f })
-#define case_4d_1 one_hot_base_params({ {0, 1, 3, 2}, {0, 4, 1, 3, 2}, 0, 4, 1.0f, 0.0f })
-#define case_4d_2 one_hot_base_params({ {0, 1, 3, 2}, {0, 1, 4, 3, 2}, 1, 4, 1.0f, 0.0f })
-#define case_4d_3 one_hot_base_params({ {0, 1, 3, 2}, {0, 1, 3, 4, 2}, 2, 4, 1.0f, 0.0f })
-// 4d -> 5d IE layouts are NCHW -> NCDHW, param layouts are {n, c , h, w} {d, n, c, h ,w}
-#define case_5d_0 one_hot_base_params({ {1, 3, 2, 3}, {2, 1, 3, 3, 4},-1, 4, 1.0f, 0.0f })
-#define case_5d_1 one_hot_base_params({ {1, 3, 2, 3}, {3, 4, 1, 2, 3}, 0, 4, 1.0f, 0.0f })
-#define case_5d_2 one_hot_base_params({ {1, 3, 2, 3}, {3, 1, 4, 2, 3}, 1, 4, 1.0f, 0.0f })
-#define case_5d_3 one_hot_base_params({ {1, 3, 2, 3}, {4, 1, 3, 2, 3}, 2, 4, 1.0f, 0.0f })
-#define case_5d_4 one_hot_base_params({ {1, 3, 2, 3}, {2, 1, 3, 4, 3}, 3, 4, 1.0f, 0.0f })
-
-one_hot_test_params one_hot_only_1d_test_cases[] = {
-    one_hot_test_params("CPU", case_1d_0),
-    one_hot_test_params("CPU", case_1d_1)
-};
-
-one_hot_test_params one_hot_only_2d_test_cases[] = {
-    one_hot_test_params("CPU", case_2d_0),
-    one_hot_test_params("CPU", case_2d_1),
-    one_hot_test_params("CPU", case_2d_2),
-};
-
-one_hot_test_params one_hot_only_3d_test_cases[] = {
-    one_hot_test_params("CPU", case_3d_0),
-    one_hot_test_params("CPU", case_3d_1),
-    one_hot_test_params("CPU", case_3d_2),
-};
-one_hot_test_params one_hot_only_4d_test_cases[] = {
-    one_hot_test_params("CPU", case_4d_0),
-    one_hot_test_params("CPU", case_4d_1),
-    one_hot_test_params("CPU", case_4d_2),
-    one_hot_test_params("CPU", case_4d_3)
-};
-
-one_hot_test_params one_hot_only_5d_test_cases[] = {
-    one_hot_test_params("CPU", case_5d_0),
-    one_hot_test_params("CPU", case_5d_1),
-    one_hot_test_params("CPU", case_5d_2),
-    one_hot_test_params("CPU", case_5d_3),
-    one_hot_test_params("CPU", case_5d_4)
-};
-
-TEST_P(OneHotOnly1dTest, TestsOneHot) {}
-INSTANTIATE_TEST_CASE_P(TestsOneHot, OneHotOnly1dTest, ::testing::ValuesIn(one_hot_only_1d_test_cases));
-
-TEST_P(OneHotOnly2dTest, TestsOneHot) {}
-INSTANTIATE_TEST_CASE_P(TestsOneHot, OneHotOnly2dTest, ::testing::ValuesIn(one_hot_only_2d_test_cases));
-
-TEST_P(OneHotOnly3dTest, TestsOneHot) {}
-INSTANTIATE_TEST_CASE_P(TestsOneHot, OneHotOnly3dTest, ::testing::ValuesIn(one_hot_only_3d_test_cases));
-
-TEST_P(OneHotOnly4dTest, TestsOneHot) {}
-INSTANTIATE_TEST_CASE_P(TestsOneHot, OneHotOnly4dTest, ::testing::ValuesIn(one_hot_only_4d_test_cases));
-
-TEST_P(OneHotOnly5dTest, TestsOneHot) {}
-INSTANTIATE_TEST_CASE_P(TestsOneHot, OneHotOnly5dTest, ::testing::ValuesIn(one_hot_only_5d_test_cases));
-
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp
deleted file mode 100644
index f4d5c8a5de6..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-﻿// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct range_test_params {
-    std::string                 precision;
-    float                       start;
-    float                       limit;
-    float                       delta;
-    InferenceEngine::SizeVector out_shape;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_range(
-    float start,
-    float limit,
-    float delta,
-    InferenceEngine::TBlob<data_t> &dst
-) {
-    data_t* dst_data = dst.data();
-    size_t work_amount_dst = std::floor(std::abs((limit - start) / delta));
-    if (work_amount_dst != dst.size())
-        FAIL() << "Range indexes exceeds data tensor dimension";
-
-    data_t dst_value = static_cast<data_t>(start);
-    for (size_t iwork = 0; iwork < work_amount_dst; ++iwork, dst_value += static_cast<data_t>(delta)) {
-        dst_data[iwork] = dst_value;
-    }
-}
-
-class MKLDNNCPUExtRangeTests : public TestsCommon, public WithParamInterface<range_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Range_net" version="2" precision="_IIDXP_" batch="1">
-    <layers>
-        <layer name="start" type="Input" precision="_IIDXP_" id="1">
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="limit" type="Input" precision="_IIDXP_" id="2">
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="delta" type="Input" precision="_IIDXP_" id="3">
-            <output>
-                <port id="3">
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="Range" precision="_IIDXP_">
-            <data/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                </port>
-                <port id="2">
-                    <dim>1</dim>
-                </port>
-                <port id="3">
-                    <dim>1</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
-        <edge from-layer="3" from-port="3" to-layer="2" to-port="3"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(range_test_params p) {
-        std::string model = model_t;
-        std::string out_shape;
-
-        REPLACE_WITH_STR(model, "_IIDXP_", p.precision);
-        for (size_t i = 0; i < p.out_shape.size(); i++) {
-            out_shape += "<dim>";
-            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_OUT_", out_shape);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            range_test_params p = ::testing::WithParamInterface<range_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            // Input Data
-            InferenceEngine::Blob::Ptr start_scalar;
-            InferenceEngine::Blob::Ptr limit_scalar;
-            InferenceEngine::Blob::Ptr delta_scalar;
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-            InferenceEngine::SizeVector scalar_dim(1, 1);
-            InferenceEngine::BlobMap srcs;
-            InferenceEngine::SizeVector out_dims;
-            if (p.precision == "I32") {
-                start_scalar = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
-                start_scalar->allocate();
-                static_cast<int32_t*>(start_scalar->buffer())[0] = static_cast<int32_t>(p.start);
-                auto * start_scalarPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(start_scalar.get());
-                if (start_scalarPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-                limit_scalar = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
-                limit_scalar->allocate();
-                static_cast<int32_t*>(limit_scalar->buffer())[0] = static_cast<int32_t>(p.limit);
-                auto * limit_scalarPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(limit_scalar.get());
-                if (limit_scalarPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-                delta_scalar = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
-                delta_scalar->allocate();
-                static_cast<int32_t*>(delta_scalar->buffer())[0] = static_cast<int32_t>(p.delta);
-                auto * delta_scalarPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(delta_scalar.get());
-                if (delta_scalarPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("start", start_scalar));
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("limit", limit_scalar));
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("delta", delta_scalar));
-
-                // Output Blob
-                InferenceEngine::TBlob<int32_t>::Ptr output;
-                output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                // Output Reference
-                InferenceEngine::TBlob<int32_t> dst_ref(item.second->getTensorDesc());
-                dst_ref.allocate();
-                ref_range(p.start, p.limit, p.delta, dst_ref);
-
-                // Infer
-                graph.Infer(srcs, outputBlobs);
-                for (int i = 0; i < dst_ref.size(); i++) {
-                    if (dst_ref.data()[i] != (*output).data()[i])
-                        FAIL() << "The difference between res_ptr[i] and ref_ptr[i]";
-                }
-            } else if (p.precision == "FP32") {
-                start_scalar = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
-                start_scalar->allocate();
-                static_cast<float*>(start_scalar->buffer())[0] = p.start;
-                auto * start_scalarPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(start_scalar.get());
-                if (start_scalarPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-
-                limit_scalar = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
-                limit_scalar->allocate();
-                static_cast<float*>(limit_scalar->buffer())[0] = p.limit;
-                auto * limit_scalarPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(limit_scalar.get());
-                if (limit_scalarPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-
-                delta_scalar = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
-                delta_scalar->allocate();
-                static_cast<float*>(delta_scalar->buffer())[0] = p.delta;
-                auto * delta_scalarPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(delta_scalar.get());
-                if (delta_scalarPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("start", start_scalar));
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("limit", limit_scalar));
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("delta", delta_scalar));
-
-                // Output Blob
-                InferenceEngine::Blob::Ptr output;
-                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                // Output Reference
-                InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-                dst_ref.allocate();
-                ref_range(p.start, p.limit, p.delta, dst_ref);
-
-                // Infer
-                graph.Infer(srcs, outputBlobs);
-                compare(*output, dst_ref);
-            } else {
-                return;
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtRangeTests, TestsRange) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsRange, MKLDNNCPUExtRangeTests,
-            ::testing::Values(
-// Params: precision, start, limit, delta, out_shape
-                range_test_params{ "I32", 3.f, 18.f, 3.f, { 5 } },
-                range_test_params{ "I32", 3.f, 1.f, -1.f, { 2 } },
-                range_test_params{ "I32", 3.f, -3.f, -1.f, { 6 } },
-                range_test_params{ "I32", 0.f, 5.f, 1.f, { 5 } },
-                range_test_params{"FP32", 3.f, 18.f, 3.f, { 5 } },
-                range_test_params{"FP32", 3.f, 1.f, -.5f, { 4 } },
-                range_test_params{"FP32", 3.f, -1.f, -.5f, { 8 } },
-                range_test_params{"FP32", 0.f, 5.f, 1.f, { 5 } }
-            ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp
deleted file mode 100644
index 2e969771006..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp
+++ /dev/null
@@ -1,535 +0,0 @@
-﻿// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct reduce_test_params {
-    std::string                 reduce_type;
-    bool                        keep_dims;
-    InferenceEngine::SizeVector in_shape;
-    std::string                 inType;
-    std::vector<float>          input_tensor;
-    std::vector<int32_t>        axes_for_reduction;
-    InferenceEngine::SizeVector out_shape;
-    std::vector<float>          reference;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename src_t, typename dst_t, typename F>
-void reduce(
-    const src_t *src_data,
-    InferenceEngine::SizeVector src_dims,
-    InferenceEngine::SizeVector srcStrides,
-    dst_t* dst_data,
-    InferenceEngine::SizeVector dst_dims,
-    InferenceEngine::SizeVector dstStrides,
-    dst_t init_value,
-    bool keep_dims,
-    InferenceEngine::SizeVector skip_dims,
-    F func
-) {
-    size_t i, src_idx, dst_idx;
-    for (i = 0; i < dstStrides[0] * dst_dims[0]; ++i)
-        dst_data[i] = init_value;
-
-    InferenceEngine::SizeVector counters(src_dims.size(), 0);
-    for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx) {
-        if (keep_dims)
-            for (i = 0, dst_idx = 0; i < dst_dims.size(); ++i)
-                dst_idx += (counters[i] % dst_dims[i]) * dstStrides[i];
-        else
-            for (i = 0, dst_idx = 0; i < dst_dims.size(); ++i)
-                dst_idx += counters[skip_dims[i]] * dstStrides[i];
-
-        dst_data[dst_idx] = func(dst_data[dst_idx], src_data[src_idx]);
-        for (int j = src_dims.size() - 1; j >= 0; j--) {
-            counters[j] = (counters[j] + 1) % src_dims[j];
-            if (counters[j] != 0) break;
-        }
-    }
-}
-
-template <typename src_t, typename dst_t>
-void ref_reduce(
-    std::string reduce_type,
-    InferenceEngine::TBlob<src_t> &src,
-    bool keep_dims,
-    std::vector<int32_t> axes_for_reduction,
-    InferenceEngine::TBlob<dst_t> &dst,
-    InferenceEngine::SizeVector &out_dims
-) {
-    size_t i, src_idx, dst_idx;
-    const src_t *src_data = src.data();
-    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
-    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
-    dst_t* dst_data = dst.data();
-    InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
-    InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
-    InferenceEngine::SizeVector skip_dims;
-
-    if (!dst_dims.size())
-        dst_dims = InferenceEngine::SizeVector(1, 1);
-
-    if (!dstStrides.size())
-        dstStrides = InferenceEngine::SizeVector(1, 1);
-
-    if (axes_for_reduction.size() == 0)
-        FAIL() << " Index vector should be 1 dimension";
-
-    for (i = 0; i < axes_for_reduction.size(); i++) {
-        int32_t axis = axes_for_reduction[i];
-        if (axis < 0)
-            axis += src_dims.size();
-
-        if (axis > src_dims.size())
-            FAIL() << " Index to squeeze exceeds data tensor dimension";
-        axes_for_reduction[i] = axis;
-    }
-
-    for (size_t j = 0; j < src_dims.size(); j++) {
-        bool found = false;
-        for (size_t axis : axes_for_reduction)
-            if (j == axis) found = true;
-
-        if (!found) {
-            out_dims.push_back(src_dims[j]);
-            if (!keep_dims) skip_dims.push_back(j);
-        }
-        else {
-            if (keep_dims) out_dims.push_back(1);
-        }
-    }
-
-    if (reduce_type == "ReduceAnd") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 1, keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x && y; } );
-        } else {
-            dst_data[0] = 1;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] = dst_data[0] && src_data[src_idx];
-        }
-    } else if (reduce_type == "ReduceL1") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 0, keep_dims, skip_dims,
-                   [](dst_t x, src_t y)->dst_t { return x + (std::abs)(y); } );
-        } else {
-            dst_data[0] = 0;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] += (std::abs)(src_data[src_idx]);
-        }
-    } else if (reduce_type == "ReduceL2") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 0, keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x + y * y; } );
-
-            for (i = 0; i < dstStrides[0] * dst_dims[0]; ++i)
-                dst_data[i] = (std::sqrt)(dst_data[i]);
-        } else {
-            dst_data[0] = 0.0f;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] += src_data[src_idx] * src_data[src_idx];
-            dst_data[0] = sqrt(dst_data[0]);
-        }
-    } else if (reduce_type == "ReduceLogSum") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 0, keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x + y; });
-
-            for (i = 0; i < dstStrides[0] * dst_dims[0]; ++i)
-                dst_data[i] = logf(dst_data[i]);
-        } else {
-            dst_data[0] = 0;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] += src_data[src_idx];
-            dst_data[0] = logf(dst_data[0]);
-        }
-    } else if (reduce_type == "ReduceLogSumExp") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 0, keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x + expf(y); });
-
-            for (i = 0; i < dstStrides[0] * dst_dims[0]; ++i)
-                dst_data[i] = logf(dst_data[i]);
-        } else {
-            dst_data[0] = 0;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] += expf(src_data[src_idx]);
-            dst_data[0] = logf(dst_data[0]);
-        }
-    } else if (reduce_type == "ReduceMax") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, (std::numeric_limits<dst_t>::min)(), keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x > y ? x : y; });
-        } else {
-            dst_data[0] = (std::numeric_limits<dst_t>::min)();
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] = dst_data[0] > src_data[src_idx] ? dst_data[0] : src_data[src_idx];
-        }
-    } else if (reduce_type == "ReduceMean") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 0, keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x + y; });
-            float reduced_dims_work_amount = 1.f;
-            for (size_t axis : axes_for_reduction) {
-                reduced_dims_work_amount *= static_cast<float>(src_dims[axis]);
-            }
-            for (i = 0; i < dstStrides[0] * dst_dims[0]; ++i)
-                dst_data[i] /= reduced_dims_work_amount;
-        } else {
-            dst_data[0] = 0;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] += src_data[src_idx];
-            dst_data[0] /= static_cast<float>(srcStrides[0] * src_dims[0]);
-        }
-    } else if (reduce_type == "ReduceMin") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, (std::numeric_limits<dst_t>::max)(), keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x < y ? x : y; });
-        } else {
-            dst_data[0] = (std::numeric_limits<dst_t>::max)();
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] = dst_data[0] < src_data[src_idx] ? dst_data[0] : src_data[src_idx];
-        }
-    } else if (reduce_type == "ReduceOr") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 0, keep_dims, skip_dims,
-                   [](dst_t x, src_t y)->dst_t { return x || y; });
-        } else {
-            dst_data[0] = 0;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] = dst_data[0] || src_data[src_idx];
-        }
-    } else if (reduce_type == "ReduceProd") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 1, keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x * y; });
-        } else {
-            dst_data[0] = 1;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] *= src_data[src_idx];
-        }
-    } else if (reduce_type == "ReduceSum") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 0, keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x + y; });
-        } else {
-            dst_data[0] = 0;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] += src_data[src_idx];
-        }
-    } else if (reduce_type == "ReduceSumSquare") {
-        if (out_dims.size()) {
-            reduce<src_t, dst_t>(src_data, src_dims, srcStrides, dst_data, dst_dims, dstStrides, 0, keep_dims, skip_dims,
-                [](dst_t x, src_t y)->dst_t { return x + y * y; });
-        } else {
-            dst_data[0] = 0;
-            for (src_idx = 0; src_idx < srcStrides[0] * src_dims[0]; ++src_idx)
-                dst_data[0] += src_data[src_idx] * src_data[src_idx];
-        }
-    }
-}
-
-class MKLDNNCPUExtReducesTests : public TestsCommon, public WithParamInterface<reduce_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Reduce_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="_IP_" id="1">
-            <output>
-                <port id="1">
-                    _IN_
-                </port>
-            </output>
-        </layer>
-        <layer name="axes_for_reduction" type="Input" precision="I32" id="2">
-            <output>
-                <port id="2">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="_REDUCE_TYPE_">
-            <data keep_dims="_KEEP_DIMS_" />
-            <input>
-                <port id="1" precision="_IP_">
-                    _IN_
-                </port>
-                <port id="2" precision="I32">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3" precision="_OP_">
-                    _OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(reduce_test_params p) {
-        std::string model = model_t;
-        std::string in_shape;
-        std::string out_shape = "";
-
-        for (size_t i = 0; i < p.in_shape.size(); i++) {
-            in_shape += "<dim>";
-            in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_IN_", in_shape);
-        REPLACE_WITH_STR(model, "_IP_", p.inType);
-        REPLACE_WITH_STR(model, "_OP_", p.inType);
-        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.axes_for_reduction.size());
-        REPLACE_WITH_STR(model, "_REDUCE_TYPE_", p.reduce_type);
-        REPLACE_WITH_NUM(model, "_KEEP_DIMS_", p.keep_dims);
-        for (size_t i = 0; i < p.out_shape.size(); i++) {
-            out_shape += "<dim>";
-            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_OUT_", out_shape);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    template <typename T>
-    static void fill_data_dbgval(T *data, size_t size) {
-        for (size_t i = 0; i < size; i++) {
-            data[i] = i + 1;
-        }
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            reduce_test_params p = ::testing::WithParamInterface<reduce_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            // Input Data
-            InferenceEngine::Blob::Ptr src;
-            InferenceEngine::SizeVector out_dims;
-
-            InferenceEngine::BlobMap srcs;
-
-            InferenceEngine::Blob::Ptr seq_lengthsIdx;
-            InferenceEngine::SizeVector seq_lengths_dim(1, p.axes_for_reduction.size());
-            seq_lengthsIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
-            seq_lengthsIdx->allocate();
-            if (p.axes_for_reduction.size())
-                memcpy(static_cast<int32_t*>(seq_lengthsIdx->buffer()), &p.axes_for_reduction[0], sizeof(int32_t)*p.axes_for_reduction.size());
-            auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(seq_lengthsIdx.get());
-            if (seq_lengthsIdxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("axes_for_reduction", seq_lengthsIdx));
-            if (p.inType == "FP32") {
-                InferenceEngine::TBlob<float>::Ptr output;
-                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-                dst_ref.allocate();
-
-                src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, p.in_shape,
-                                                                InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape)});
-                src->allocate();
-                if (p.input_tensor.size())
-                    for (int i = 0; i < p.input_tensor.size(); i++) {
-                        static_cast<float*>(src->buffer())[i] = static_cast<float>(p.input_tensor[i]);
-                    }
-                else
-                    fill_data_dbgval<float>(src->buffer(), src->size());
-                auto *srcPtr = dynamic_cast<InferenceEngine::TBlob<float> *>(src.get());
-                if (srcPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-
-                ref_reduce<float, float>(p.reduce_type, *srcPtr, p.keep_dims, p.axes_for_reduction, dst_ref, out_dims);
-                if (p.reference.size())
-                    if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
-                        FAIL() << "Wrong result with compare reference vector!";
-                // Infer
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-                graph.Infer(srcs, outputBlobs);
-                compare(*output, dst_ref);
-            } else if (p.inType == "I32") {
-                InferenceEngine::TBlob<int32_t>::Ptr output;
-                output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                InferenceEngine::TBlob<int32_t> dst_ref({ InferenceEngine::Precision::I32, p.out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.out_shape) });
-                dst_ref.allocate();
-
-                src = InferenceEngine::make_shared_blob<int32_t>({InferenceEngine::Precision::I32, p.in_shape,
-                                                                  InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape)});
-                src->allocate();
-                if (p.input_tensor.size())
-                    for (int i = 0; i < p.input_tensor.size(); i++) {
-                        static_cast<int32_t*>(src->buffer())[i] = static_cast<int32_t>(p.input_tensor[i]);
-                    }
-                else
-                    fill_data_dbgval<int32_t>(src->buffer(), src->size());
-                auto *srcPtr = dynamic_cast<InferenceEngine::TBlob<int32_t> *>(src.get());
-                if (srcPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-                ref_reduce<int32_t, int32_t>(p.reduce_type, *srcPtr, p.keep_dims, p.axes_for_reduction, dst_ref, out_dims);
-                if (p.reference.size()) {
-                    for (int i = 0; i < p.reference.size(); i++) {
-                        if (dst_ref.data()[i] != p.reference[i])
-                            FAIL() << "Wrong result with compare reference vector!";
-                        //std::cout << p.reference[i] << " " << dst_ref.data()[i] << std::endl;
-                    }
-                }
-
-                // Infer
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-                graph.Infer(srcs, outputBlobs);
-                compare(*output, dst_ref);
-            }
-            // Check results
-            if (out_dims.size() != p.out_shape.size())
-                FAIL() << "Wrong out_shape size!";
-            for (size_t i = 0; i < p.out_shape.size(); i++) {
-                if (out_dims[i] != p.out_shape[i])
-                    FAIL() << "Wrong out_shape dimensions!";
-            }
-
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtReducesTests, TestsReduceSum) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsReduceSum, MKLDNNCPUExtReducesTests,
-    ::testing::Values(
-        // Params: reduce_type, keep_dims, in_shape, inType, input_tensor, axes_for_reduction, out_shape, reference
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ 0 },{ 1, 3, 4 },{ 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ -3 },{ 1, 3, 4 },{ 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ 2 },{ 2, 3, 1 },{ 10, 26, 42, 58, 74, 90 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ -1 },{ 2, 3, 1 },{ 10, 26, 42, 58, 74, 90 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ 0, 2 },{ 1, 3, 1 },{ 68, 100, 132 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ 1, 2 },{ 2, 1, 1 },{ 78, 222 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ 2, 1 },{ 2, 1, 1 },{ 78, 222 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ 0, 1, 2 },{ 1, 1, 1 },{ 300 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ 0, -2, 2 },{ 1, 1, 1 },{ 300 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 2, 2, 2, 2, 2, 2 },"FP32",{},{ 0, 1, 2, 3, 4, 5, 6 },{ 1, 1, 1, 1, 1, 1, 1 },{ 8256 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 2, 2, 2, 2, 2, 2 },"FP32",{},{ 6, 3, 1, 4, 0 },{ 1, 1, 2, 1, 1, 2, 1 },{ 1776, 1840, 2288, 2352 } },
-        reduce_test_params{ "ReduceSum", true,{ 2, 3, 4 },"FP32",{},{ 2, 2, 0, 2, 0 },{ 1, 3, 1 },{ 68, 100, 132 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ 0 },{ 3, 4 },{ 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ -3 },{ 3, 4 },{ 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ 2 },{ 2, 3 },{ 10, 26, 42, 58, 74, 90 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ -1 },{ 2, 3 },{ 10, 26, 42, 58, 74, 90 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ 0, 2 },{ 3 },{ 68, 100, 132 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ 1, 2 },{ 2 },{ 78, 222 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ 2, 1 },{ 2 },{ 78, 222 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ 0, 1, 2 },{},{ 300 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ 0, -2, 2 },{},{ 300 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 2, 2, 2, 2, 2, 2 },"FP32",{},{ 0, 1, 2, 3, 4, 5, 6 },{},{ 8256 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"FP32",{},{ 2, 2, 0, 2, 0 },{ 3 },{ 68, 100, 132 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 2, 2, 2, 2, 2, 2 },"FP32",{},{ 6, 3, 1, 4, 0 },{ 2, 2 },{ 1776, 1840, 2288, 2352 } },
-        reduce_test_params{ "ReduceSum", true,{ 1, 2, 3, 4, 1 },"FP32",{},{ 1 },{ 1, 1, 3, 4, 1 },{ 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36 } },
-        reduce_test_params{ "ReduceSum", false,{ 1, 2, 3, 4, 1 },"FP32",{},{ 1 },{ 1, 3, 4, 1 },{ 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36 } },
-// I32 tests
-        reduce_test_params{ "ReduceAnd", true,{ 2, 2, 2 },"I32",{1, 0, 1, 1, 0, 1, 1, 0},{ 2 },{ 2, 2, 1 },{ 0, 1, 0, 0} },
-        reduce_test_params{ "ReduceL1", true, { 3, 2, 2 },"I32",{},{ 2 },{ 3, 2, 1 },{ 3, 7, 11, 15, 19, 23 } },
-        reduce_test_params{ "ReduceL1", false, { 3, 2, 2 },"I32",{},{ 0, 1, 2 },{ },{ 78 } },
-        reduce_test_params{ "ReduceL2", false,{ 3, 2, 2 },"I32",{},{ 2 },{ 3, 2 },{ 2, 5, 7, 10, 13, 16 } },
-        reduce_test_params{ "ReduceL2", false,{ 3, 2, 2 },"I32",{},{ 0, 1, 2 },{ },{ 25 } },
-        reduce_test_params{ "ReduceLogSum", true,{ 10, 10, 2 },"I32",{},{ 2 },{ 10, 10, 1 },{} },
-        reduce_test_params{ "ReduceLogSumExp", true,{ 5, 5, 2 },"I32",{},{ 2 },{ 5, 5, 1 },{} },
-        reduce_test_params{ "ReduceMax", true,{ 3, 2, 2 },"I32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 1, 2 },{ 20, 2, 40, 2, 60, 2 } },
-        reduce_test_params{ "ReduceMean", true, { 3, 2, 2 },"I32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 1, 2 },{ 12, 1, 35, 1, 57, 1 } },
-        reduce_test_params{ "ReduceMin", false,{ 3, 2, 2 },"I32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 2 },{ 5, 1, 30, 1, 55, 1 } },
-        reduce_test_params{ "ReduceOr", true,{ 2, 2, 2 },"I32",{1, 0, 1, 1, 0, 0, 1, 0},{ 2 },{ 2, 2, 1 },{1, 1, 0, 1 } },
-        reduce_test_params{ "ReduceProd", true,{ 3, 2, 2 },"I32",{},{ 1 },{ 3, 1, 2 },{ 3, 8, 35, 48, 99, 120 } },
-        reduce_test_params{ "ReduceSum", false,{ 2, 3, 4 },"I32",{},{ 2, 2, 0, 2, 0 },{ 3 },{ 68, 100, 132 } },
-        reduce_test_params{ "ReduceSumSquare", true, { 3, 2, 2 },"I32",{},{ 1 },{ 3, 1, 2 },{ 10, 20, 74, 100, 202, 244 } },
-        reduce_test_params{ "ReduceSumSquare", false, { 3, 2, 2 },"I32",{},{ 0, 1, 2 },{ },{ 650 } }
-));
-
-
-TEST_P(MKLDNNCPUExtReducesTests, TestsReduceAll) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsReduceAll, MKLDNNCPUExtReducesTests,
-            ::testing::Values(
-// Params: reduce_type, keep_dims, in_shape, inType, input_tensor, axes_for_reduction, out_shape, reference
-                reduce_test_params{ "ReduceAnd", true,{ 2, 2, 2 },"FP32",{1, 0, 1, 1, 0, 1, 1, 0},{ 2 },{ 2, 2, 1 },{ 0, 1, 0, 0} },
-                reduce_test_params{ "ReduceAnd", false, { 2, 2, 2 },"FP32",{1, 0, 1, 1, 0, 1, 1, 0},{ 0, 1, 2 },{ },{ 0 } },
-                reduce_test_params{ "ReduceL1", true,{ 10, 10, 2 },"FP32",{},{ 2 },{ 10, 10, 1 },{ } },
-                reduce_test_params{ "ReduceL1", true, { 3, 2, 2 },"FP32",{},{ 2 },{ 3, 2, 1 },{ 3, 7, 11, 15, 19, 23 } },
-                reduce_test_params{ "ReduceL1", false, { 3, 2, 2 },"FP32",{},{ 2 },{ 3, 2 },{ 3, 7, 11, 15, 19, 23 } },
-                reduce_test_params{ "ReduceL1", false, { 3, 2, 2 },"FP32",{},{ 0, 1, 2 },{ },{ 78 } },
-                reduce_test_params{ "ReduceL2", true,{ 10, 10, 2 },"FP32",{},{ 2 },{ 10, 10, 1 },{} },
-                reduce_test_params{ "ReduceL2", true,{ 3, 2, 2 },"FP32",{},{ 2 },{ 3, 2, 1 },{ 2.23606798f, 5.f, 7.81024968f, 10.63014581f, 13.45362405f, 16.2788206f } },
-                reduce_test_params{ "ReduceL2", false,{ 3, 2, 2 },"FP32",{},{ 2 },{ 3, 2 },{ 2.23606798f, 5.f, 7.81024968f, 10.63014581f, 13.45362405f, 16.2788206f } },
-                reduce_test_params{ "ReduceL2", false,{ 3, 2, 2 },"FP32",{},{ 0, 1, 2 },{ },{ 25.49509757f } },
-                reduce_test_params{ "ReduceLogSum", true,{ 10, 10, 2 },"FP32",{},{ 2 },{ 10, 10, 1 },{} },
-                reduce_test_params{ "ReduceLogSum", true,{ 3, 2, 2 },"FP32",{ },{ 1 },{ 3, 1, 2 },{ } },
-                reduce_test_params{ "ReduceLogSum", false,{ 3, 2, 2 },"FP32",{ },{ 1 },{ 3, 2 },{ } },
-                reduce_test_params{ "ReduceLogSum", false,{ 3, 2, 2 },"FP32",{ },{ 0, 1, 2 },{},{ } },
-                reduce_test_params{ "ReduceLogSumExp", true,{ 5, 5, 2 },"FP32",{},{ 2 },{ 5, 5, 1 },{} },
-                reduce_test_params{ "ReduceLogSumExp", true,{ 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 1, 2 },{ 20.f, 2.31326175f, 40.00004578f, 2.31326175f, 60.00671387f, 2.31326175f } },
-                reduce_test_params{ "ReduceLogSumExp", false,{ 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 2 },{ 20.f, 2.31326175f, 40.00004578f, 2.31326175f, 60.00671387f, 2.31326175f } },
-                reduce_test_params{ "ReduceLogSumExp", false,{ 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 0, 1, 2 },{},{ 60.00671387f } },
-                reduce_test_params{ "ReduceMax", true,{ 10, 10, 2 },"FP32",{},{ 2 },{ 10, 10, 1 },{} },
-                reduce_test_params{ "ReduceMax", true,{ 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 1, 2 },{ 20, 2, 40, 2, 60, 2 } },
-                reduce_test_params{ "ReduceMax", false,{ 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 2 },{ 20, 2, 40, 2, 60, 2 } },
-                reduce_test_params{ "ReduceMax", false,{ 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 0, 1, 2 },{},{ 60 } },
-                reduce_test_params{ "ReduceMean", true,{ 10, 10, 2 },"FP32",{},{ 2 },{ 10, 10, 1 },{} },
-                reduce_test_params{ "ReduceMean", true, { 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 1, 2 },{ 12.5f, 1.5f, 35.f, 1.5f, 57.5f, 1.5f } },
-                reduce_test_params{ "ReduceMean", false, { 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 2 },{ 12.5f, 1.5f, 35.f, 1.5f, 57.5f, 1.5f } },
-                reduce_test_params{ "ReduceMean", false, { 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 0, 1, 2 },{ },{ 18.25f } },
-                reduce_test_params{ "ReduceMin", true,{ 10, 10, 2 },"FP32",{},{ 2 },{ 10, 10, 1 },{} },
-                reduce_test_params{ "ReduceMin", true,{ 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 1, 2 },{ 5, 1, 30, 1, 55, 1 } },
-                reduce_test_params{ "ReduceMin", false,{ 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 1 },{ 3, 2 },{ 5, 1, 30, 1, 55, 1 } },
-                reduce_test_params{ "ReduceMin", false,{ 3, 2, 2 },"FP32",{ 5, 1, 20, 2, 30, 1, 40, 2, 55, 1, 60, 2 },{ 0, 1, 2 },{},{ 1 } },
-                reduce_test_params{ "ReduceOr", true,{ 2, 2, 2 },"FP32",{1, 0, 1, 1, 0, 0, 1, 0},{ 2 },{ 2, 2, 1 },{1, 1, 0, 1 } },
-                reduce_test_params{ "ReduceOr", false, { 2, 2, 2 },"FP32",{},{ 0, 1, 2 },{ },{ 1 } },
-                reduce_test_params{ "ReduceProd", true,{ 10, 10, 2 },"FP32",{},{ 2 },{ 10, 10, 1 },{} },
-                reduce_test_params{ "ReduceProd", true,{ 3, 2, 2 },"FP32",{},{ 1 },{ 3, 1, 2 },{ 3, 8, 35, 48, 99, 120 } },
-                reduce_test_params{ "ReduceProd", false,{ 3, 2, 2 },"FP32",{},{ 1 },{ 3, 2 },{ 3, 8, 35, 48, 99, 120 } },
-                reduce_test_params{ "ReduceProd", false,{ 3, 2, 2 },"FP32",{},{ 0, 1, 2 },{ },{ 4.790016e+08 } },
-                reduce_test_params{ "ReduceSumSquare", true,{ 10, 10, 2 },"FP32",{},{ 2 },{ 10, 10, 1 },{} },
-                reduce_test_params{ "ReduceSumSquare", true, { 3, 2, 2 },"FP32",{},{ 1 },{ 3, 1, 2 },{ 10, 20, 74, 100, 202, 244 } },
-                reduce_test_params{ "ReduceSumSquare", false, { 3, 2, 2 },"FP32",{},{ 1 },{ 3, 2 },{ 10, 20, 74, 100, 202, 244 } },
-                reduce_test_params{ "ReduceSumSquare", false, { 3, 2, 2 },"FP32",{},{ 0, 1, 2 },{ },{ 650 } }
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp
deleted file mode 100644
index c31ba958bba..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp
+++ /dev/null
@@ -1,265 +0,0 @@
-﻿// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct reverse_sequence_test_params {
-    std::string inIdxPrecision;
-    InferenceEngine::SizeVector in_out_shape;
-    std::vector<int32_t> seq_lengths;
-    int                  seq_axis;
-    int                  batch_axis;
-    std::vector<float> reference;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_reverse_sequence(
-    InferenceEngine::TBlob<float> &src,
-    InferenceEngine::TBlob<data_t> &seq_lengths,
-    InferenceEngine::TBlob<float> &dst,
-    int seq_axis,
-    int batch_axis
-) {
-    size_t i, src_idx;
-    const float *src_data = src.data();
-    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
-    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
-    const data_t *seq_lengths_data = seq_lengths.data();
-    InferenceEngine::SizeVector seq_lengths_dims = seq_lengths.getTensorDesc().getDims();
-    float* dst_data = dst.data();
-
-    if (seq_axis < 0)
-        seq_axis += src_dims.size();
-
-    if (seq_axis < 0 || seq_axis >= src_dims.size())
-        FAIL() << "Incorrect 'seq_axis' parameters dimensions and axis number!";
-
-    if (batch_axis < 0)
-        batch_axis += src_dims.size();
-
-    if (batch_axis < 0 || batch_axis >= src_dims.size())
-        FAIL() << "Incorrect 'batch_axis' parameters dimensions and axis number!";
-
-    for (i = 0; i < src_dims[batch_axis]; i++) {
-        if (static_cast<int32_t>(seq_lengths_data[i]) > src_dims[seq_axis])
-            FAIL() << "Incorrect input 'seq_lengths' values!";
-    }
-
-    size_t work_amount_dst = srcStrides[0] * src_dims[0];
-    InferenceEngine::SizeVector counters(src_dims.size(), 0);
-    for (size_t iwork = 0; iwork < work_amount_dst; ++iwork) {
-        for (i = 0, src_idx = 0; i < src_dims.size(); ++i) {
-            size_t idx = counters[i];
-            if (i == seq_axis && idx < static_cast<int32_t>(seq_lengths_data[counters[batch_axis]])) {
-                idx = static_cast<int32_t>(seq_lengths_data[counters[batch_axis]]) - idx - 1;
-            }
-            src_idx += idx * srcStrides[i];
-        }
-
-        dst_data[iwork] = src_data[src_idx];
-
-        for (int j = src_dims.size() - 1; j >= 0; j--) {
-            counters[j] = (counters[j] + 1) % src_dims[j];
-            if (counters[j] != 0) break;
-        }
-    }
-}
-
-class MKLDNNCPUExtReverseSequenceTests : public TestsCommon, public WithParamInterface<reverse_sequence_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="ReverseSequence_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-        <layer name="seq_lengths" type="Input" precision="_IIDXP_" id="2">
-            <output>
-                <port id="2">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="ReverseSequence" precision="FP32">
-            <data seq_axis="_SA_" batch_axis="_BA_"/>
-            <input>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-                <port id="2">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(reverse_sequence_test_params p) {
-        std::string model = model_t;
-        std::string in_out_shape;
-        for (size_t i = 0; i < p.in_out_shape.size(); i++) {
-            in_out_shape += "<dim>";
-            in_out_shape += std::to_string(p.in_out_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_IIDXP_", p.inIdxPrecision);
-        REPLACE_WITH_STR(model, "_IN_OUT_", in_out_shape);
-        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.seq_lengths.size());
-        REPLACE_WITH_NUM(model, "_SA_", p.seq_axis);
-        REPLACE_WITH_NUM(model, "_BA_", p.batch_axis);
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            reverse_sequence_test_params p = ::testing::WithParamInterface<reverse_sequence_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            // Input Data
-            InferenceEngine::Blob::Ptr src;
-            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_out_shape) });
-            src->allocate();
-            fill_data_dbgval(src->buffer(), src->size());
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-
-            InferenceEngine::Blob::Ptr seq_lengthsIdx;
-            InferenceEngine::SizeVector seq_lengths_dim(1, p.seq_lengths.size());
-            if (p.inIdxPrecision == "I32") {
-                seq_lengthsIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
-                seq_lengthsIdx->allocate();
-                if (p.seq_lengths.size())
-                    memcpy(static_cast<int32_t*>(seq_lengthsIdx->buffer()), &p.seq_lengths[0], sizeof(int32_t)*p.seq_lengths.size());
-                auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(seq_lengthsIdx.get());
-                if (seq_lengthsIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-                // Check results
-                ref_reverse_sequence(*srcPtr, *seq_lengthsIdxPtr, dst_ref, p.seq_axis, p.batch_axis);
-                if (p.reference.size()) {
-                    if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
-                        FAIL() << "Wrong result with compare TF reference!";
-                }
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("seq_lengths", seq_lengthsIdx));
-            } else if (p.inIdxPrecision == "FP32") {
-                seq_lengthsIdx = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
-                seq_lengthsIdx->allocate();
-                if (p.seq_lengths.size())
-                    for (size_t i = 0; i < p.seq_lengths.size(); i++) {
-                        static_cast<float *>(seq_lengthsIdx->buffer())[i] = static_cast<float>(p.seq_lengths[i]);
-                    }
-                auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(seq_lengthsIdx.get());
-                if (seq_lengthsIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-
-                // Check results
-                ref_reverse_sequence(*srcPtr, *seq_lengthsIdxPtr, dst_ref, p.seq_axis, p.batch_axis);
-                if (p.reference.size()) {
-                    if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
-                        FAIL() << "Wrong result with compare TF reference!";
-                }
-                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("seq_lengths", seq_lengthsIdx));
-            } else {
-                return;
-            }
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-//  Test data vectors
-static std::vector<float> test0 = { 9.f,10.f,11.f,12.f,13.f,14.f,15.f,16.f,17.f,0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f,18.f,19.f,20.f,21.f,22.f,23.f,24.f,25.f,26.f };
-static std::vector<float> test2 = { 3.f,4.f,5.f,0.f,1.f,2.f,6.f,7.f,8.f,12.f,13.f,14.f,9.f,10.f,11.f,15.f,16.f,17.f,21.f,22.f,23.f,18.f,19.f,20.f,24.f,25.f,26.f };
-static std::vector<float> test4 = { 1.f,0.f,2.f,4.f,3.f,5.f,7.f,6.f,8.f,10.f,9.f,11.f,13.f,12.f,14.f,16.f,15.f,17.f,19.f,18.f,20.f,22.f,21.f,23.f,25.f,24.f,26.f };
-static std::vector<float> test7 = { 0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f,12.f,13.f,14.f,9.f,10.f,11.f,15.f,16.f,17.f,24.f,25.f,26.f,21.f,22.f,23.f,18.f,19.f,20.f };
-static std::vector<float> test8 = { 0.f,4.f,8.f,3.f,1.f,5.f,6.f,7.f,2.f,9.f,13.f,17.f,12.f,10.f,14.f,15.f,16.f,11.f,18.f,22.f,26.f,21.f,19.f,23.f,24.f,25.f,20.f };
-
-TEST_P(MKLDNNCPUExtReverseSequenceTests, TestsReverseSequence) {}
-INSTANTIATE_TEST_CASE_P(
-    TestsReverseSequence, MKLDNNCPUExtReverseSequenceTests,
-            ::testing::Values(
-// Params: in_out_shape, seq_lengths, seq_axis, batch_axis, reference
-/*  0 */        reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 },  0, 0, test0 },
-                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 }, -3, 0, test0 },
-                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 },  1, 0, test2 },
-                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 }, -2, 0, test2 },
-                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 },  2, 1, test4 },
-/*  5 */        reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 }, -1, 1, test4 },
-                reverse_sequence_test_params{ "I32", { 2, 3 },{ 3, 2 }, 1, 0, {2,1,0,4,3,5} },
-                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 1, 2, 3 },  1, 0, test7 },
-                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 1, 2, 3 },  1,-3, test7 },
-                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 1, 2, 3 },  1, 2, test8 },
-                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 },  0, 0, test0 },
-                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 }, -3, 0, test0 },
-                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 },  1, 0, test2 },
-                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 }, -2, 0, test2 },
-                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 },  2, 1, test4 },
-                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 }, -1, 1, test4 },
-/* 15 */        reverse_sequence_test_params{"FP32", { 2, 3 },{ 3, 2 }, 1, 0, {2,1,0,4,3,5} },
-                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 1, 2, 3 },  1, 0, test7 },
-                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 1, 2, 3 },  1,-3, test7 },
-                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 1, 2, 3 },  1, 2, test8 },
-                reverse_sequence_test_params{"FP32", { 2, 2, 2, 3 },{ 1, 2 },  3, 0, {0,1,2,3,4,5,6,7,8,9,10,11,13,12,14,16,15,17,19,18,20,22,21,23} },
-                reverse_sequence_test_params{"FP32", { 2, 2, 2, 3 },{ 2, 2 },  2, 0, {3,4,5,0,1,2,9,10,11,6,7,8,15,16,17,12,13,14,21,22,23,18,19,20} }
-            ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp
deleted file mode 100644
index ef4644a4dcb..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp
+++ /dev/null
@@ -1,203 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct scatterTF_test_params {
-    std::string inIdxPrecision;
-    InferenceEngine::SizeVector inDataDim;
-    std::vector<float> inData;
-    InferenceEngine::SizeVector inIdxDim;
-    std::vector<int32_t> inIdx;
-    std::vector<float> inUpd;
-    int axis;
-
-    std::vector<float> reference;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNCPUExtScatterTFTests : public TestsCommon, public WithParamInterface<scatterTF_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Scatter_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputData" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IDATA_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputIndexes" type="Input" precision="_IIDXP_" id="2">
-            <output>
-                <port id="2">
-                    _IIDX_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputUpdates" type="Input" precision="FP32" id="3">
-            <output>
-                <port id="3">
-                    _IIDX_
-                </port>
-            </output>
-        </layer>
-        <layer name="scatter" type="ScatterUpdate" precision="FP32" id="4">
-            <data axis="_AX_"/>
-            <input>
-                <port id="1">
-                    _IDATA_
-                </port>
-                <port id="2" precision="_IIDXP_">
-                    _IIDX_
-                </port>
-                <port id="3">
-                    _IIDX_
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    _IDATA_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="4" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="4" to-port="2"/>
-        <edge from-layer="3" from-port="3" to-layer="4" to-port="3"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(scatterTF_test_params p) {
-        std::string model = model_t;
-        std::string inIdx;
-        std::string inData;
-
-        for (auto& idx : p.inIdxDim) {
-            inIdx += "<dim>";
-            inIdx += std::to_string(idx) + "</dim>\n";
-        }
-
-        for (auto& dct : p.inDataDim) {
-            inData += "<dim>";
-            inData += std::to_string(dct) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_IIDX_", inIdx);
-        REPLACE_WITH_STR(model, "_IIDXP_", p.inIdxPrecision);
-        REPLACE_WITH_STR(model, "_IDATA_", inData);
-        REPLACE_WITH_NUM(model, "_AX_", p.axis);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            scatterTF_test_params p = ::testing::WithParamInterface<scatterTF_test_params>::GetParam();
-            std::string model = getModel(p);
-            //std::cout << model << std::endl;
-                        InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            //  Input Data
-            InferenceEngine::Blob::Ptr srcData = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.inDataDim, InferenceEngine::TensorDesc::getLayoutByDims(p.inDataDim) });
-            srcData->allocate();
-            memcpy(srcData->buffer(), &p.inData[0], sizeof(float)*p.inData.size());
-            auto * srcDataPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcData.get());
-            if (srcDataPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            // Input Indexes
-            InferenceEngine::Blob::Ptr srcIdx;
-            if (p.inIdxPrecision == "I32") {
-                srcIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.inIdxDim, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdxDim) });
-                srcIdx->allocate();
-                memcpy(static_cast<int32_t*>(srcIdx->buffer()), &p.inIdx[0], sizeof(int32_t)*p.inIdx.size());
-                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(srcIdx.get());
-                if (srcIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-            } else {
-                srcIdx = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.inIdxDim, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdxDim) });
-                srcIdx->allocate();
-                for (size_t i = 0; i < p.inIdx.size(); i++) {
-                    static_cast<float*>(srcIdx->buffer())[i] = static_cast<float>(p.inIdx[i]);
-                }
-                auto * srcIdxPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcIdx.get());
-                if (srcIdxPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-            }
-
-            // Input Updates
-            InferenceEngine::Blob::Ptr srcUpd;
-            srcUpd = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.inIdxDim, InferenceEngine::TensorDesc::getLayoutByDims(p.inIdxDim) });
-            srcUpd->allocate();
-            memcpy(static_cast<float*>(srcUpd->buffer()), &p.inUpd[0], sizeof(float)*p.inUpd.size());
-            auto * srcUpdPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(srcUpd.get());
-            if (srcUpdPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            //  Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            //  Infer
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputData", srcData));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputIndexes", srcIdx));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputUpdates", srcUpd));
-            graph.Infer(srcs, outputBlobs);
-
-            //  Check results
-            if (memcmp((*output).data(), &p.reference[0], output->byteSize()) != 0)
-                FAIL() << "Wrong result with compare TF reference!";
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-// Disabled these tests as they need to adjust with new specs:
-// - new Scatter Update layer: like TF scatter_update
-// - new Scatter Elements Update: like ONNX Scatter Elements
-// See merge requests:
-// DLDT #6005: Specification for the ScatterElementsUpdate layer
-// DLDT #6091: Specification for ScatterUpdate operation
-TEST_P(MKLDNNCPUExtScatterTFTests, DISABLED_TestsScatter) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsScatter, MKLDNNCPUExtScatterTFTests,
-        ::testing::Values(
-// Params: inDataDim, inData, inIdxDim, inIdx, inUpd, axis, reference
-        scatterTF_test_params{ "I32", { 3,3 },{ 0,0,0,0,0,0,0,0,0 },{ 2,3 },{ 1,0,2,0,2,1 },{ 1.,1.1,1.2,2,2.1,2.2 }, 0,{ 2,1.1,0,1,0,2.2,0,2.1,1.2 }},
-        scatterTF_test_params{ "I32", { 3,3 },{ 0,0,0,0,0,0,0,0,0 },{ 2,3 },{ 1,0,2,0,2,1 },{ 1.,1.1,1.2,2,2.1,2.2 }, 1,{ 1.1,1,1.2,2,2.2,2.1,0,0,0 }},
-        scatterTF_test_params{ "I32", { 1,5 },{ 1,2,3,4,5 },{ 1,2 },{ 1,3 },{ 1.1,2.1 }, 1,{ 1,1.1,3,2.1,5 }},
-        scatterTF_test_params{"FP32", { 3,3 },{ 0,0,0,0,0,0,0,0,0 },{ 2,3 },{ 1,0,2,0,2,1 },{ 1.,1.1,1.2,2,2.1,2.2 }, 0,{ 2,1.1,0,1,0,2.2,0,2.1,1.2 }},
-        scatterTF_test_params{"FP32", { 3,3 },{ 0,0,0,0,0,0,0,0,0 },{ 2,3 },{ 1,0,2,0,2,1 },{ 1.,1.1,1.2,2,2.1,2.2 }, 1,{ 1.1,1,1.2,2,2.2,2.1,0,0,0 }},
-        scatterTF_test_params{"FP32", { 1,5 },{ 1,2,3,4,5 },{ 1,2 },{ 1,3 },{ 1.1,2.1 }, 1,{ 1,1.1,3,2.1,5 }}));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp
deleted file mode 100644
index 8e5b2fcbe85..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-﻿// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-using namespace InferenceEngine;
-
-using select_test_params = std::tuple<
-    InferenceEngine::Precision,     // conditionType
-    InferenceEngine::SizeVector,    // conditionShape
-    InferenceEngine::SizeVector     // inputShape
->;
-
-template<typename T>
-void ref_select(
-    InferenceEngine::TBlob<T>     &condition,
-    InferenceEngine::TBlob<float> &then_,
-    InferenceEngine::TBlob<float> &else_,
-    InferenceEngine::TBlob<float> &dst
-) {
-    const T *conditionData = condition.buffer();
-
-    const float *thenData = then_.cbuffer().as<const float *>();
-
-    const float *elseData = else_.cbuffer().as<const float *>();
-
-    float* dstData = dst.cbuffer().as<float *>();
-    enum {N, C, H, W, Dims};
-    int dim[Dims] = {1, 1, 1, 1};
-    int cdim[Dims] = {1, 1, 1, 1};
-
-    InferenceEngine::SizeVector dims = then_.getTensorDesc().getDims();
-    std::copy(std::begin(dims), std::end(dims), std::begin(dim) + (Dims - dims.size()));
-
-    InferenceEngine::SizeVector cDims = condition.getTensorDesc().getDims();
-    std::copy(std::begin(cDims), std::end(cDims), std::begin(cdim) + (Dims - cDims.size()));
-
-    for (int b = 0; b < dim[N]; b++)
-    for (int c = 0; c < dim[C]; c++)
-    for (int h = 0; h < dim[H]; h++)
-    for (int w = 0; w < dim[W]; w++) {
-                dstData[b*dim[C]*dim[H]*dim[W] + c*dim[H]*dim[W] + h*dim[W] + w]
-        = conditionData[(b % cdim[N])*cdim[C]*cdim[H]*cdim[W] + (c % cdim[C])*cdim[H]*cdim[W] + (h % cdim[H])*cdim[W] + (w % cdim[W])]
-        ?      thenData[b*dim[C]*dim[H]*dim[W] + c*dim[H]*dim[W] + h*dim[W] + w]
-        :      elseData[b*dim[C]*dim[H]*dim[W] + c*dim[H]*dim[W] + h*dim[W] + w];
-    }
-}
-
-class MKLDNNCPUExtSelectTests : public TestsCommon, public WithParamInterface<select_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="Select_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="condition" type="Input" precision="_CONDITION_TYPE_" id="0">
-            <output>
-                <port id="0">
-                    _CONDITION_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="then_" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="0">
-                    _INPUT_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="else_" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="0">
-                    _INPUT_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="select" type="Select" precision="FP32" id="3">
-            <input>
-                <port id="0">
-                    _CONDITION_SHAPE_
-                </port>
-                <port id="1">
-                    _INPUT_SHAPE_
-                </port>
-                <port id="2">
-                    _INPUT_SHAPE_
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _INPUT_SHAPE_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="0" to-layer="3" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(InferenceEngine::Precision  conditionType,
-                         InferenceEngine::SizeVector conditionShape,
-                         InferenceEngine::SizeVector inputShape) {
-        std::string model = model_t;
-
-        {
-            std::string conditionTypeStr;
-            switch(conditionType) {
-                case InferenceEngine::Precision::FP32 : conditionTypeStr = "FP32"; break;
-                case InferenceEngine::Precision::I32  : conditionTypeStr = "I32" ; break;
-                default: EXPECT_FALSE("Unsuported pressision");
-            }
-            REPLACE_WITH_STR(model, "_CONDITION_TYPE_", conditionTypeStr);
-        }
-
-        {
-            std::string conditionShapeStr;
-            for (auto dim : conditionShape) {
-                conditionShapeStr += "<dim>";
-                conditionShapeStr += std::to_string(dim) + "</dim>\n";
-            }
-            conditionShapeStr.pop_back();
-            REPLACE_WITH_STR(model, "_CONDITION_SHAPE_", conditionShapeStr);
-        }
-
-        {
-            std::string inputShapeStr;
-            for (auto dim : inputShape) {
-                inputShapeStr += "<dim>";
-                inputShapeStr += std::to_string(dim) + "</dim>\n";
-            }
-            inputShapeStr.pop_back();
-            REPLACE_WITH_STR(model, "_INPUT_SHAPE_", inputShapeStr);
-        }
-
-        return model;
-    }
-
-    static void fill_even(int32_t *data, size_t size) {
-        for (size_t i = 0; i < size; i++)
-            data[i] = i%2 ? 1 : 0;
-    }
-
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            Precision  conditionType;
-            SizeVector conditionShape;
-            SizeVector inputShape;
-            std::tie(conditionType, conditionShape, inputShape) = ::testing::WithParamInterface<select_test_params>::GetParam();
-            std::string model = getModel(conditionType, conditionShape, inputShape);
-            
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            // Input Data
-            InferenceEngine::Blob::Ptr then_;
-            then_ = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                                                               inputShape, InferenceEngine::TensorDesc::getLayoutByDims(inputShape) });
-            then_->allocate();
-            fill_data_dbgval(then_->buffer(), then_->size());
-            auto * thenPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(then_.get());
-            if (thenPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            // Input Data
-            InferenceEngine::Blob::Ptr else_;
-            else_ = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                                                               inputShape, InferenceEngine::TensorDesc::getLayoutByDims(inputShape) });
-            else_->allocate();
-            fill_data_dbgval(else_->buffer(), else_->size(), -1.0);
-            auto * elsePtr = dynamic_cast<InferenceEngine::TBlob<float>*>(else_.get());
-            if (elsePtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::Blob::Ptr condition;
-
-            switch (conditionType) {
-            case Precision::FP32 :
-                condition = make_shared_blob<float>({ conditionType, conditionShape,
-                                                  TensorDesc::getLayoutByDims(conditionShape) });
-                condition->allocate();
-                fill_data(condition->buffer(), condition->size());
-
-                break;
-            case Precision::I32 :
-                condition = make_shared_blob<int32_t>({ conditionType, conditionShape,
-                                                        TensorDesc::getLayoutByDims(conditionShape) });
-                break;
-            default:
-                FAIL();
-                break;
-            }
-
-            condition->allocate();
-            fill_even(condition->buffer(), condition->size());
-
-            switch (conditionType) {
-            case InferenceEngine::Precision::FP32 : {
-                auto conditionPtr = std::dynamic_pointer_cast<InferenceEngine::TBlob<float>>(condition);
-                if (conditionPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<float>.";
-                ref_select(*conditionPtr, *thenPtr, *elsePtr, dst_ref);
-            }
-            break;
-            case InferenceEngine::Precision::I32 : {
-                auto conditionPtr = std::dynamic_pointer_cast<InferenceEngine::TBlob<int32_t>>(condition);
-                if (conditionPtr == nullptr)
-                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-                ref_select(*conditionPtr, *thenPtr, *elsePtr, dst_ref);
-            }
-            break;
-            default:
-                FAIL();
-            }
-
-            InferenceEngine::BlobMap srcs = {
-                {"condition", condition},
-                {"then_",     then_},
-                {"else_",     else_},
-            };
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtSelectTests, TestsSelect) {}
-INSTANTIATE_TEST_CASE_P(
-    TestsSelect, MKLDNNCPUExtSelectTests,
-            Combine(
-                Values(Precision::I32),
-                Values(
-//                       SizeVector {},  // TODO: scalars is not supported right now for CPU backend
-                       SizeVector {1},
-                       SizeVector {1, 1},
-                       SizeVector {1, 16},
-                       SizeVector {3, 1, 16},
-                       SizeVector {1, 16, 1},
-                       SizeVector {3, 16, 16}),
-                Values(SizeVector {3, 16, 16})
-            ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp
deleted file mode 100644
index 8a4c12bd422..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-﻿// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct shuffle_channels_test_params {
-    InferenceEngine::SizeVector in_out_shape;
-    int axis;
-    int group;
-
-    std::vector<float> reference;
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-void ref_shuffle_channels(
-    InferenceEngine::TBlob<float> &src,
-    InferenceEngine::TBlob<float> &dst,
-    int axis,
-    int group
-) {
-    size_t i;
-    const float *src_data = src.data();
-    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
-    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
-    float* dst_data = dst.data();
-    InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
-    InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
-
-    if (axis < 0)
-        axis += dst_dims.size();
-
-    if (axis < 0 || axis >= dst_dims.size())
-        FAIL() << "Incorrect input parameters dimensions and axis number!";
-
-    if (dst_dims[axis] % group)
-        FAIL() << "Group parameter must evenly divide the channel dimension!";
-
-    //  Find number of dictionaries, index range and data length
-    size_t numDictionaries = 1;
-    for (i = 0; i <= axis; i++)
-        numDictionaries *= dst_dims[i];
-
-    size_t channelsNum = dst_dims[axis] / group;
-
-    size_t dataLength = 1;
-    for (i = axis + 1; i < dst_dims.size(); i++)
-        dataLength *= dst_dims[i];
-
-    if (dataLength == 0)
-        FAIL() << "Incorrect input parameters dimension!";
-
-    size_t j, k;
-    for (j = 0, k = 0; j < numDictionaries; j += dst_dims[axis]) {
-        for (i = 0; i < (dst_dims[axis] * channelsNum); i += channelsNum, k += dataLength) {
-            int idx = j + i / dst_dims[axis] + i % dst_dims[axis];
-            memcpy(&dst_data[k], &src_data[dataLength * idx], sizeof(float) * dataLength);
-        }
-    }
-}
-
-class MKLDNNCPUExtShuffleChannelsTests : public TestsCommon, public WithParamInterface<shuffle_channels_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="ShuffleChannels_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="ShuffleChannels" precision="FP32">
-            <data axis="_AX_" group="_GR_"/>
-            <input>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-           </input>
-            <output>
-                <port id="2">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(shuffle_channels_test_params p) {
-        std::string model = model_t;
-        std::string in_out_shape;
-
-        for (size_t i = 0; i < p.in_out_shape.size(); i++) {
-            in_out_shape += "<dim>";
-            in_out_shape += std::to_string(p.in_out_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_IN_OUT_", in_out_shape);
-        REPLACE_WITH_NUM(model, "_AX_", p.axis);
-        REPLACE_WITH_NUM(model, "_GR_", p.group);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            shuffle_channels_test_params p = ::testing::WithParamInterface<shuffle_channels_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            // Input Data
-            InferenceEngine::Blob::Ptr src;
-            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_out_shape) });
-            src->allocate();
-            fill_data_dbgval(src->buffer(), src->size());
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            // Check results
-            InferenceEngine::SizeVector out_dims;
-            ref_shuffle_channels(*srcPtr, dst_ref, p.axis, p.group);
-
-            //  Check results
-            if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
-                FAIL() << "Wrong result with compare TF reference!";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-
-TEST_P(MKLDNNCPUExtShuffleChannelsTests, TestsShuffleChannels) {}
-
-//  Test data vectors
-static std::vector<float> test0 = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f,
-                                    4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f,
-                                    8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f };
-static std::vector<float> test4 = { 0.f, 2.f, 4.f, 1.f, 3.f, 5.f, 6.f, 8.f, 10.f, 7.f, 9.f, 11.f, 12.f, 14.f, 16.f, 13.f, 15.f, 17.f, 18.f, 20.f, 22.f, 19.f, 21.f, 23.f };
-static std::vector<float> test5 = { 0.f, 1.f, 4.f, 5.f, 8.f, 9.f, 2.f, 3.f, 6.f, 7.f, 10.f, 11.f, 12.f, 13.f, 16.f, 17.f, 20.f, 21.f, 14.f, 15.f, 18.f, 19.f, 22.f, 23.f };
-static std::vector<float> test6 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f, 6.f, 9.f, 7.f, 10.f, 8.f, 11.f, 12.f, 15.f, 13.f, 16.f, 14.f, 17.f, 18.f, 21.f, 19.f, 22.f, 20.f, 23.f };
-static std::vector<float> test7 = { 0.f, 1.f, 6.f, 7.f, 2.f, 3.f, 8.f, 9.f, 4.f, 5.f, 10.f, 11.f, 12.f, 13.f, 18.f, 19.f, 14.f, 15.f, 20.f, 21.f, 16.f, 17.f, 22.f, 23.f };
-static std::vector<float> test8 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f };
-
-INSTANTIATE_TEST_CASE_P(
-    TestsShuffleChannels, MKLDNNCPUExtShuffleChannelsTests,
-            ::testing::Values(
-// Params: in_out_shape, axis, group, reference
-/* 0 */         shuffle_channels_test_params{ { 1, 15, 2, 2 }, 1, 5, test0 },
-                shuffle_channels_test_params{ { 1, 15, 2, 2 }, -3, 5, test0 },
-                shuffle_channels_test_params{ { 15, 2, 2 }, 0, 5, test0 },
-                shuffle_channels_test_params{ { 15, 2, 2 }, -3, 5, test0 },
-                shuffle_channels_test_params{ { 2, 2, 6 }, -1, 3, test4 },
-/* 5 */         shuffle_channels_test_params{ { 2, 6, 2 }, -2, 3, test5 },
-                shuffle_channels_test_params{ { 2, 2, 6 }, -1, 2, test6 },
-                shuffle_channels_test_params{ { 2, 6, 2 }, -2, 2, test7 },
-                shuffle_channels_test_params{ { 6 }, 0, 2, test8 }
-            ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp
deleted file mode 100644
index a1cb52dd793..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp
+++ /dev/null
@@ -1,545 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-#include <algorithm>
-#include <vector>
-#include <array>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct sparse_fill_empty_rows_test_params {
-    std::string precision;
-    InferenceEngine::SizeVector input_indices_shape;
-    std::vector<float> input_indices_value;
-
-    InferenceEngine::SizeVector input_values_shape;
-
-    InferenceEngine::SizeVector input_dense_shape_shape;
-    std::vector<float> input_dense_shape_value;
-
-    InferenceEngine::SizeVector input_default_value_shape;
-    std::vector<float> input_default_value_value;
-
-    InferenceEngine::SizeVector output_indices_shape;
-    InferenceEngine::SizeVector output_values_shape;
-    InferenceEngine::SizeVector output_empty_rows_indicator_shape;
-
-    size_t num_prim_desc;
-    int selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-void ref_sparse_fill_empty_rows(InferenceEngine::TBlob<float> &input_indices,
-    InferenceEngine::TBlob<float> &input_values,
-    InferenceEngine::TBlob<float> &dense_shape,
-    InferenceEngine::TBlob<float> &default_value,
-    InferenceEngine::TBlob<float> &output_indices,
-    InferenceEngine::TBlob<float> &output_values,
-    InferenceEngine::TBlob<float> &output_empty_rows_indicator) {
-    const float *input_indices_ptr = input_indices.data();
-    const float *input_values_ptr = input_values.data();
-    const float *dense_shape_ptr = dense_shape.data();
-    const float *default_value_ptr = default_value.data();
-    float dflt_value = default_value_ptr[0];
-
-    float num_rows = dense_shape_ptr[0];
-    float num_cols = dense_shape_ptr[1];
-
-    std::vector<size_t> dims = input_values.getTensorDesc().getDims();
-    size_t inMaxNumValues = dims[0];
-    std::vector<size_t> out_dims = output_values.getTensorDesc().getDims();
-    size_t outMaxNumValues = out_dims[0];
-
-    // compute actual number of values by searching out of range indice that serves as a marker
-    size_t in_actual_num_values = 0;
-    for (in_actual_num_values = 0; in_actual_num_values < inMaxNumValues; in_actual_num_values++) {
-        float indice_x = input_indices_ptr[2 * in_actual_num_values];
-        float indice_y = input_indices_ptr[2 * in_actual_num_values + 1];
-        if (indice_x < 0 || indice_y < 0 || indice_x >= num_rows || indice_y >= num_cols) break;
-    }
-
-    // create auxiliary container for sorting
-    std::vector<std::array<float, 3>> indices_values(in_actual_num_values); // <row, column, value>
-    for (size_t i = 0; i < in_actual_num_values; i++) {
-        float row = input_indices_ptr[2 * i];
-        float col = input_indices_ptr[2 * i + 1];
-        float value = input_values_ptr[i];
-        std::array<float, 3> elem = { row, col, value };
-        indices_values[i] = elem;
-    }
-
-    // sort values by row
-    std::sort(indices_values.begin(), indices_values.end(),
-        [](const std::array<float, 3>& first, const std::array<float, 3>& second) {
-        return first[0] < second[0];
-    });
-
-    // unsplit indices and values
-    std::vector<float> indices_with_sorted_rows;
-    std::vector<float> values_for_sorted_rows;
-    for (auto const & elem : indices_values) {
-        indices_with_sorted_rows.push_back(elem[0]);
-        indices_with_sorted_rows.push_back(elem[1]);
-        values_for_sorted_rows.push_back(elem[2]);
-    }
-
-    // compute start indice for each row and a number of values at each row
-    std::vector<int> values_at_row(num_rows);
-    std::fill(values_at_row.begin(), values_at_row.end(), 0);
-    float prev_row_with_value = -1.0;
-    unsigned int total_num_values = 0;
-    std::vector<std::array<float, 3>>::iterator curr_it, prev_it;
-    for (float row_ind = 0.0; row_ind < num_rows; row_ind = row_ind + 1.0) {
-        curr_it = std::find_if(indices_values.begin(), indices_values.end(),
-            [row_ind](std::array<float, 3> elem) { return elem[0] == row_ind; });
-        if (curr_it != indices_values.end()) {
-            if (prev_row_with_value != -1.0) {
-                unsigned int num_values_at_prev_row = std::distance(prev_it, curr_it);
-                values_at_row[(int)prev_row_with_value] = num_values_at_prev_row;
-                total_num_values += num_values_at_prev_row;
-            }
-            prev_row_with_value = row_ind;
-            prev_it = curr_it;
-        }
-        else {
-            total_num_values++;
-        }
-    }
-    if (prev_row_with_value != -1.0) {
-        unsigned int num_values_at_prev_row = std::distance(prev_it, indices_values.end());
-        values_at_row[(int)prev_row_with_value] = num_values_at_prev_row;
-        total_num_values += num_values_at_prev_row;
-    }
-
-    // create output indices
-    float *output_indices_ptr = output_indices.data();
-    float *output_values_ptr = output_values.data();
-    float *output_empty_rows_indicator_ptr = output_empty_rows_indicator.data();
-
-    // zero output buffers
-    std::memset(output_indices_ptr, 0, outMaxNumValues * 2 * sizeof(float));
-    std::memset(output_values_ptr, 0, outMaxNumValues * sizeof(float));
-    std::memset(output_empty_rows_indicator_ptr, 0, num_rows * sizeof(float));
-
-    unsigned int curr_pos_from_copy = 0;
-    unsigned int curr_pos_to_copy = 0;
-    for (int row_ind = 0; row_ind < (int)num_rows; row_ind++) {
-        unsigned int num_values_at_row = values_at_row[row_ind];
-        if (num_values_at_row == 0) {
-            output_empty_rows_indicator_ptr[row_ind] = 1.0;
-            output_values_ptr[curr_pos_to_copy] = dflt_value;
-            output_indices_ptr[curr_pos_to_copy * 2] = (float)row_ind;
-            output_indices_ptr[curr_pos_to_copy * 2 + 1] = 0.0;
-            curr_pos_to_copy++;
-        }
-        else {
-            output_empty_rows_indicator_ptr[row_ind] = 0.0;
-            std::copy(values_for_sorted_rows.begin() + curr_pos_from_copy,
-                values_for_sorted_rows.begin() + curr_pos_from_copy + num_values_at_row,
-                output_values_ptr + curr_pos_to_copy);
-            std::copy(indices_with_sorted_rows.begin() + 2 * curr_pos_from_copy,
-                indices_with_sorted_rows.begin() + 2 * curr_pos_from_copy + 2 * num_values_at_row, output_indices_ptr + 2 * curr_pos_to_copy);
-            curr_pos_to_copy += num_values_at_row;
-            curr_pos_from_copy += num_values_at_row;
-        }
-    }
-
-    // mark the end of output using (-1, -1) indice
-    if (total_num_values < outMaxNumValues) {
-        output_indices_ptr[total_num_values * 2] = -1.0;
-        output_indices_ptr[total_num_values * 2 + 1] = -1.0;
-    }
-}
-
-class MKLDNNCPUExtSparseFillEmptyRowsTests : public TestsCommon, public WithParamInterface<sparse_fill_empty_rows_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="SparseFillEmptyRows_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputIndices" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    _IIN_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputValues" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="0">
-                    _IVL_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputDenseShape" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="0">
-                    _IDS_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputDefaultValue" type="Input" precision="FP32" id="3">
-            <output>
-                <port id="0">
-                    _IDV_
-                </port>
-            </output>
-        </layer>
-        <layer name="SparseFillEmptyRows" id="4" type="SparseFillEmptyRows" precision="FP32">
-            <input>
-                <port id="0">
-                    _IIN_
-                </port>
-                <port id="1">
-                    _IVL_
-                </port>
-                <port id="2">
-                    _IDS_
-                </port>
-                <port id="3">
-                    _IDV_
-                </port>
-            </input>
-            <output>
-                <port id="0">
-                    _OIN_
-                </port>
-                <port id="1">
-                    _OVL_
-                </port>
-                <port id="2">
-                    _ERI_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="4" to-port="1"/>
-        <edge from-layer="2" from-port="0" to-layer="4" to-port="2"/>
-        <edge from-layer="3" from-port="0" to-layer="4" to-port="3"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(sparse_fill_empty_rows_test_params p) {
-        std::string model = model_t;
-        std::string input_indices;
-        std::string input_values;
-        std::string dense_shape;
-        std::string default_value;
-        std::string output_indices;
-        std::string output_values;
-        std::string output_empty_rows_indicator;
-
-        InferenceEngine::SizeVector input_dense_shape_shape = { 2 };
-
-        for (auto& shape : p.input_indices_shape) {
-            input_indices += "<dim>";
-            input_indices += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : p.input_values_shape) {
-            input_values += "<dim>";
-            input_values += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : input_dense_shape_shape) {
-            dense_shape += "<dim>";
-            dense_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : p.input_default_value_shape) {
-            default_value += "<dim>";
-            default_value += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : p.output_indices_shape) {
-            output_indices += "<dim>";
-            output_indices += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : p.output_values_shape) {
-            output_values += "<dim>";
-            output_values += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : p.output_empty_rows_indicator_shape) {
-            output_empty_rows_indicator += "<dim>";
-            output_empty_rows_indicator += std::to_string(shape) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_IIN_", input_indices);
-        REPLACE_WITH_STR(model, "_IVL_", input_values);
-        REPLACE_WITH_STR(model, "_IDS_", dense_shape);
-        REPLACE_WITH_STR(model, "_IDV_", default_value);
-        REPLACE_WITH_STR(model, "_OIN_", output_indices);
-        REPLACE_WITH_STR(model, "_OVL_", output_values);
-        REPLACE_WITH_STR(model, "_ERI_", output_empty_rows_indicator);
-
-        return model;
-    }
-
-    template <typename data_t>
-    static void fill_data_dbgval(data_t *data, size_t size) {
-        for (size_t i = 0; i < size; i++) {
-            data[i] = static_cast<data_t>(i & (sizeof(data_t) * 8 - 1));
-        }
-    }
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            sparse_fill_empty_rows_test_params p = ::testing::WithParamInterface<sparse_fill_empty_rows_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-
-            for (auto &node : nodes) {
-                if (node->getName() == "SparseFillEmptyRows") {
-                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                        node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-            // 4 inputs + 1 op + 3 outputs
-            ASSERT_EQ(8, nodes.size());
-
-            // Input Data
-            InferenceEngine::Blob::Ptr input_indices = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_indices_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_indices_shape) });
-            input_indices->allocate();
-            auto *input_indices_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_indices.get());
-            std::copy(p.input_indices_value.begin(), p.input_indices_value.end(), (float *) input_indices_ptr->data());
-
-            InferenceEngine::Blob::Ptr input_values = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_values_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_values_shape) });
-            input_values->allocate();
-            fill_data(input_values->buffer(), input_values->size());
-
-            auto *input_values_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_values.get());
-            InferenceEngine::Blob::Ptr input_dense_shape = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_dense_shape_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_dense_shape_shape) });
-            input_dense_shape->allocate();
-            auto *input_dense_shape_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_dense_shape.get());
-            std::copy(p.input_dense_shape_value.begin(), p.input_dense_shape_value.end(), (float *) input_dense_shape_ptr->data());
-
-            InferenceEngine::Blob::Ptr input_default_value = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_default_value_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_default_value_shape) });
-            input_default_value->allocate();
-            auto *input_default_value_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_default_value.get());
-            std::copy(p.input_default_value_value.begin(), p.input_default_value_value.end(), (float *) input_default_value_ptr->data());
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap output_blobs;
-            auto iter = out.begin();
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *(iter++);
-            InferenceEngine::Blob::Ptr output_indices = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output_indices->allocate();
-            output_blobs[item.first] = output_indices;
-            InferenceEngine::TBlob<float> output_indices_ref(item.second->getTensorDesc());
-            output_indices_ref.allocate();
-
-            item = *(iter++);
-            InferenceEngine::Blob::Ptr output_values = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output_values->allocate();
-            output_blobs[item.first] = output_values;
-            InferenceEngine::TBlob<float> output_values_ref(item.second->getTensorDesc());
-            output_values_ref.allocate();
-
-            item = *(iter++);
-            InferenceEngine::Blob::Ptr output_empty_rows_indicator = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output_empty_rows_indicator->allocate();
-            output_blobs[item.first] = output_empty_rows_indicator;
-            InferenceEngine::TBlob<float> output_empty_rows_indicator_ref(item.second->getTensorDesc());
-            output_empty_rows_indicator_ref.allocate();
-
-            // Compute reference result
-            ref_sparse_fill_empty_rows(*input_indices_ptr, *input_values_ptr, *input_dense_shape_ptr, *input_default_value_ptr,
-                output_indices_ref, output_values_ref, output_empty_rows_indicator_ref);
-
-            // Compute IE result
-            InferenceEngine::BlobMap inputs;
-            inputs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputIndices", input_indices));
-            inputs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputValues", input_values));
-            inputs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputDenseShape", input_dense_shape));
-            inputs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("InputDefaultValue", input_default_value));
-
-            // Check the result
-            graph.Infer(inputs, output_blobs);
-            compare(*output_indices, output_indices_ref, 0.0f);
-            compare(*output_values, output_values_ref, 0.0f);
-            compare(*output_empty_rows_indicator, output_empty_rows_indicator_ref, 0.0f);
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtSparseFillEmptyRowsTests, TestsSparseFillEmptyRows) {}
-
-
-// case 1 - empty sparse tensor with marker
-InferenceEngine::SizeVector input_indices_shape_case1 = {2, 2};
-std::vector<float>          input_indices_value_case1 = {-1.f, -1.f};
-InferenceEngine::SizeVector input_values_shape_case1 = {2};
-InferenceEngine::SizeVector input_dense_shape_shape_case1 = {2};
-std::vector<float>          input_dense_shape_value_case1 = {3.f, 4.f};
-InferenceEngine::SizeVector input_default_value_shape_case1 = {1};
-std::vector<float>          input_default_value_case1 = {0.f};
-InferenceEngine::SizeVector output_indices_shape_case1 = {12, 2};
-InferenceEngine::SizeVector output_values_shape_case1 = {12};
-InferenceEngine::SizeVector output_empty_rows_indicator_shape_case1 = {3};
-
-// case 2 - in one row all values absent without marker
-InferenceEngine::SizeVector input_indices_shape_case2 = {6, 2};
-std::vector<float>          input_indices_value_case2 = {1.f, 0.f, 0.f, 0.f, 3.f, 1.f, 1.f, 2.f, 3.f, 4.f, 0.f, 1.f};
-InferenceEngine::SizeVector input_values_shape_case2 = {6};
-InferenceEngine::SizeVector input_dense_shape_shape_case2 = {2};
-std::vector<float>          input_dense_shape_value_case2 = {4.f, 5.f};
-InferenceEngine::SizeVector input_default_value_shape_case2 = {1};
-std::vector<float>          input_default_value_case2 = {0.f};
-InferenceEngine::SizeVector output_indices_shape_case2 = {20, 2};
-InferenceEngine::SizeVector output_values_shape_case2 = {20};
-InferenceEngine::SizeVector output_empty_rows_indicator_shape_case2 = {4};
-
-// case 3 - in one row all values absent with marker
-InferenceEngine::SizeVector input_indices_shape_case3 = { 6, 2 };
-std::vector<float>          input_indices_value_case3 = { 1.f, 0.f, 0.f, 0.f, 3.f, 1.f, 1.f, 2.f, 3.f, 4.f, -1.f, -1.f };
-InferenceEngine::SizeVector input_values_shape_case3 = { 6 };
-InferenceEngine::SizeVector input_dense_shape_shape_case3 = { 2 };
-std::vector<float>          input_dense_shape_value_case3 = { 4.f, 5.f };
-InferenceEngine::SizeVector input_default_value_shape_case3 = { 1 };
-std::vector<float>          input_default_value_case3 = { 0.f };
-InferenceEngine::SizeVector output_indices_shape_case3 = { 20, 2 };
-InferenceEngine::SizeVector output_values_shape_case3 = { 20 };
-InferenceEngine::SizeVector output_empty_rows_indicator_shape_case3 = { 4 };
-
-// case 4 - in all rows at least one value presents without marker
-InferenceEngine::SizeVector input_indices_shape_case4 = { 7, 2 };
-std::vector<float>          input_indices_value_case4 = { 1.f, 0.f, 0.f, 0.f, 3.f, 1.f, 1.f, 2.f, 3.f, 3.f, 2.f, 1.f, 4.f, 3.f };
-InferenceEngine::SizeVector input_values_shape_case4 = { 7 };
-InferenceEngine::SizeVector input_dense_shape_shape_case4 = { 2 };
-std::vector<float>          input_dense_shape_value_case4 = { 5.f, 4.f };
-InferenceEngine::SizeVector input_default_value_shape_case4 = { 1 };
-std::vector<float>          input_default_value_case4 = { 0.f };
-InferenceEngine::SizeVector output_indices_shape_case4 = { 20, 2 };
-InferenceEngine::SizeVector output_values_shape_case4 = { 20 };
-InferenceEngine::SizeVector output_empty_rows_indicator_shape_case4 = { 5 };
-
-// case 5 - in all rows at least one value presents with marker
-InferenceEngine::SizeVector input_indices_shape_case5 = { 8, 2 };
-std::vector<float>          input_indices_value_case5 = { 1.f, 0.f, 0.f, 0.f, 3.f, 1.f, 1.f, 2.f, 3.f, 3.f, 2.f, 1.f, 4.f, 3.f, -1.f, -1.f };
-InferenceEngine::SizeVector input_values_shape_case5 = { 8 };
-InferenceEngine::SizeVector input_dense_shape_shape_case5 = { 2 };
-std::vector<float>          input_dense_shape_value_case5 = { 5.f, 4.f };
-InferenceEngine::SizeVector input_default_value_shape_case5 = { 1 };
-std::vector<float>          input_default_value_case5 = { 0.f };
-InferenceEngine::SizeVector output_indices_shape_case5 = { 20, 2 };
-InferenceEngine::SizeVector output_values_shape_case5 = { 20 };
-InferenceEngine::SizeVector output_empty_rows_indicator_shape_case5 = { 5 };
-
-// case 6 - big sparse tensor with many missed rows without marker
-InferenceEngine::SizeVector input_indices_shape_case6 = { 7, 2 };
-std::vector<float>          input_indices_value_case6 = { 1.f, 0.f, 0.f, 0.f, 99.f, 19.f, 12.f, 2.f, 37.f, 13.f, 2.f, 1.f, 45.f, 3.f };
-InferenceEngine::SizeVector input_values_shape_case6 = { 7 };
-InferenceEngine::SizeVector input_dense_shape_shape_case6 = { 2 };
-std::vector<float>          input_dense_shape_value_case6 = { 100.f, 20.f };
-InferenceEngine::SizeVector input_default_value_shape_case6 = { 1 };
-std::vector<float>          input_default_value_case6 = { 0.f };
-InferenceEngine::SizeVector output_indices_shape_case6 = { 2000, 2 };
-InferenceEngine::SizeVector output_values_shape_case6 = { 2000 };
-InferenceEngine::SizeVector output_empty_rows_indicator_shape_case6 = { 100 };
-
-// case 7 - big sparse tensor with many missed rows with marker
-InferenceEngine::SizeVector input_indices_shape_case7 = { 8, 2 };
-std::vector<float>          input_indices_value_case7 = { 1.f, 0.f, 0.f, 0.f, 99.f, 19.f, 12.f, 2.f, 37.f, 13.f, 2.f, 1.f, 45.f, 3.f, -1.f, -1.f };
-InferenceEngine::SizeVector input_values_shape_case7 = { 8 };
-InferenceEngine::SizeVector input_dense_shape_shape_case7 = { 2 };
-std::vector<float>          input_dense_shape_value_case7 = { 100.f, 20.f };
-InferenceEngine::SizeVector input_default_value_shape_case7 = { 1 };
-std::vector<float>          input_default_value_case7 = { 0.f };
-InferenceEngine::SizeVector output_indices_shape_case7 = { 2000, 2 };
-InferenceEngine::SizeVector output_values_shape_case7 = { 2000 };
-InferenceEngine::SizeVector output_empty_rows_indicator_shape_case7 = { 100 };
-
-INSTANTIATE_TEST_CASE_P(
-    TestsSparseFillEmptyRows, MKLDNNCPUExtSparseFillEmptyRowsTests,
-            ::testing::Values(
-                // case 1 - empty sparse tensor without marker
-                sparse_fill_empty_rows_test_params{ "FP32",
-                input_indices_shape_case1, input_indices_value_case1, input_values_shape_case1,
-                input_dense_shape_shape_case1, input_dense_shape_value_case1, input_default_value_shape_case1, input_default_value_case1,
-                output_indices_shape_case1, output_values_shape_case1, output_empty_rows_indicator_shape_case1,
-                1, MKLDNNPlugin::impl_desc_type::unknown },
-
-                // case 2 - in one row all values absent without marker
-                sparse_fill_empty_rows_test_params{ "FP32",
-                input_indices_shape_case2, input_indices_value_case2, input_values_shape_case2,
-                input_dense_shape_shape_case2, input_dense_shape_value_case2, input_default_value_shape_case2, input_default_value_case2,
-                output_indices_shape_case2, output_values_shape_case2, output_empty_rows_indicator_shape_case2,
-                1, MKLDNNPlugin::impl_desc_type::unknown },
-
-                // case 3 - in one row all values absent with marker
-                sparse_fill_empty_rows_test_params{ "FP32",
-                input_indices_shape_case3, input_indices_value_case3, input_values_shape_case3,
-                input_dense_shape_shape_case3, input_dense_shape_value_case3, input_default_value_shape_case3, input_default_value_case3,
-                output_indices_shape_case3, output_values_shape_case3, output_empty_rows_indicator_shape_case3,
-                1, MKLDNNPlugin::impl_desc_type::unknown },
-
-                // case 4 - in all rows at least one value presents without marker
-                sparse_fill_empty_rows_test_params{ "FP32",
-                input_indices_shape_case4, input_indices_value_case4, input_values_shape_case4,
-                input_dense_shape_shape_case4, input_dense_shape_value_case4, input_default_value_shape_case4, input_default_value_case4,
-                output_indices_shape_case4, output_values_shape_case4, output_empty_rows_indicator_shape_case4,
-                1, MKLDNNPlugin::impl_desc_type::unknown },
-
-                // case 5 - in all rows at least one value presents with marker
-                sparse_fill_empty_rows_test_params{ "FP32",
-                input_indices_shape_case5, input_indices_value_case5, input_values_shape_case5,
-                input_dense_shape_shape_case5, input_dense_shape_value_case5, input_default_value_shape_case5, input_default_value_case5,
-                output_indices_shape_case5, output_values_shape_case5, output_empty_rows_indicator_shape_case5,
-                1, MKLDNNPlugin::impl_desc_type::unknown },
-
-                // case 6 - big sparse tensor with many missed rows without marker
-                sparse_fill_empty_rows_test_params{ "FP32",
-                input_indices_shape_case6, input_indices_value_case6, input_values_shape_case6,
-                input_dense_shape_shape_case6, input_dense_shape_value_case6, input_default_value_shape_case6, input_default_value_case6,
-                output_indices_shape_case6, output_values_shape_case6, output_empty_rows_indicator_shape_case6,
-                1, MKLDNNPlugin::impl_desc_type::unknown },
-
-                // case 7 - big sparse tensor with many missed rows with marker
-                sparse_fill_empty_rows_test_params{ "FP32",
-                input_indices_shape_case7, input_indices_value_case7, input_values_shape_case7,
-                input_dense_shape_shape_case7, input_dense_shape_value_case7, input_default_value_shape_case7, input_default_value_case7,
-                output_indices_shape_case7, output_values_shape_case7, output_empty_rows_indicator_shape_case7,
-                1, MKLDNNPlugin::impl_desc_type::unknown }
-                ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp
deleted file mode 100644
index 7bde79bf029..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-#include <algorithm>
-#include <vector>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct sparse_segment_reduce_test_params {
-    std::string model;
-
-    std::string precision;
-
-    std::string reduce_op;
-
-    InferenceEngine::SizeVector input_data_shape;
-    std::vector<float> input_data_value;
-    InferenceEngine::SizeVector input_indices_shape;
-    std::vector<float> input_indices_value;
-    InferenceEngine::SizeVector input_segment_ids_shape;
-    std::vector<float> input_segment_ids_value;
-
-    InferenceEngine::SizeVector output_shape;
-
-    std::vector<float> output_ref;
-
-    size_t num_prim_desc;
-    int selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNCPUExtSparseSegmentReduceTests : public TestsCommon, public WithParamInterface<sparse_segment_reduce_test_params> {
-    std::string getModel(sparse_segment_reduce_test_params p) {
-        std::string model = p.model;
-
-        std::string input_data_shape;
-        std::string input_indices_shape;
-        std::string input_segment_ids_shape;
-        std::string output_shape;
-
-        for (auto& shape : p.input_data_shape) {
-            input_data_shape += "<dim>";
-            input_data_shape += std::to_string(shape) + "</dim>\n";
-        }
-        for (auto& shape : p.input_indices_shape) {
-            input_indices_shape += "<dim>";
-            input_indices_shape += std::to_string(shape) + "</dim>\n";
-        }
-        for (auto& shape : p.input_segment_ids_shape) {
-            input_segment_ids_shape += "<dim>";
-            input_segment_ids_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : p.output_shape) {
-            output_shape += "<dim>";
-            output_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_REDUCE_OP_", p.reduce_op);
-        REPLACE_WITH_STR(model, "_INPUT_DATA_", input_data_shape);
-        REPLACE_WITH_STR(model, "_INPUT_INDICES_", input_indices_shape);
-        REPLACE_WITH_STR(model, "_INPUT_SEGMENT_IDS_", input_segment_ids_shape);
-        REPLACE_WITH_STR(model, "_OUTPUT_", output_shape);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            sparse_segment_reduce_test_params p = ::testing::WithParamInterface<sparse_segment_reduce_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-
-            for (auto &node : nodes) {
-                if (node->getName() == "SparseSegmentReduce") {
-                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                        node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            // prepare input blobs and input blob map
-            InferenceEngine::BlobMap input_blob_map;
-            InferenceEngine::Blob::Ptr input_data = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_data_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_data_shape) });
-            input_data->allocate();
-            auto *input_data_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_data.get());
-            std::copy(p.input_data_value.begin(), p.input_data_value.end(), (float *)input_data_ptr->data());
-            input_blob_map["InputData"] = input_data;
-            InferenceEngine::Blob::Ptr input_indices = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_indices_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_indices_shape) });
-            input_indices->allocate();
-            auto *input_indices_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_indices.get());
-            std::copy(p.input_indices_value.begin(), p.input_indices_value.end(), (float *)input_indices_ptr->data());
-            input_blob_map["InputIndices"] = input_indices;
-
-            InferenceEngine::Blob::Ptr input_segment_ids = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_segment_ids_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_segment_ids_shape) });
-            input_segment_ids->allocate();
-            auto *input_segment_ids_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_segment_ids.get());
-            std::copy(p.input_segment_ids_value.begin(), p.input_segment_ids_value.end(), (float *)input_segment_ids_ptr->data());
-            input_blob_map["InputSegmentIds"] = input_segment_ids;
-
-            // prepare output blob map
-            InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-            InferenceEngine::BlobMap output_blob_map;
-            for (auto iter = out.begin(); iter != out.end(); iter++) {
-                std::pair<std::string, InferenceEngine::DataPtr> item = *iter;
-                InferenceEngine::Blob::Ptr output_blob_ptr = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output_blob_ptr->allocate();
-                output_blob_map[item.first] = output_blob_ptr;
-            }
-
-            // prepare blob with output reference data
-            InferenceEngine::Blob::Ptr output_ref = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                 p.output_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_shape) });
-            output_ref->allocate();
-            auto *output_ref_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(output_ref.get());
-            std::copy(p.output_ref.begin(), p.output_ref.end(), (float *)output_ref_ptr->data());
-
-            // infer
-            graph.Infer(input_blob_map, output_blob_map);
-
-            // check the result
-            auto iter = out.begin();
-            compare(*output_blob_map[iter->first], *output_ref, 0.0f);
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtSparseSegmentReduceTests, TestsSparseSegmentReduce) {}
-
-// model that contains one SparseSegmentReduce layer
-std::string model = R"V0G0N(
-<net Name="SparseSegmentReduce_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputData" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    _INPUT_DATA_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputIndices" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="0">
-                    _INPUT_INDICES_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputSegmentIds" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="0">
-                    _INPUT_SEGMENT_IDS_
-                </port>
-            </output>
-        </layer>
-        <layer name="SparseSegmentReduceLayer" id="3" type="_REDUCE_OP_" precision="FP32">
-            <input>
-                <port id="0">
-                    _INPUT_DATA_
-                </port>
-                <port id="1">
-                    _INPUT_INDICES_
-                </port>
-                <port id="2">
-                    _INPUT_SEGMENT_IDS_
-                </port>
-            </input>
-            <output>
-                <port id="0">
-                    _OUTPUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="0" to-layer="3" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-// case 0 - reduce = "sum", 5 segments, where two segments are empty
-std::string                 reduce_op_case0 = "SparseSegmentSum";
-InferenceEngine::SizeVector input_data_shape_case0        = { 4, 3 };
-std::vector<float>          input_data_value_case0        = { 0.f, 1.f, 2.f,
-                                                              3.f, 4.f, 5.f,
-                                                              6.f, 7.f, 8.f,
-                                                              9.f, 10.f, 11.f };
-InferenceEngine::SizeVector input_indices_shape_case0     = { 5 };
-std::vector<float>          input_indices_value_case0     = { 3.f, 1.f, 1.f, 0.f, 2.f};
-InferenceEngine::SizeVector input_segment_ids_shape_case0 = { 5 };
-std::vector<float>          input_segment_ids_value_case0 = { 0.f, 0.f, 2.f, 2.f, 4.f };
-InferenceEngine::SizeVector output_shape_case0            = { 5, 3 };
-std::vector<float>          output_value_ref_case0        = { 12.f, 14.f, 16.f,
-                                                              0.f, 0.f, 0.f,
-                                                              3.f, 5.f, 7.f,
-                                                              0.f, 0.f, 0.f,
-                                                              6.f, 7.f, 8.f };
-
-// case 1 - reduce = "mean", 5 segments, where two segments are empty
-std::string                 _reduce_op_case1 = "SparseSegmentMean";
-InferenceEngine::SizeVector _input_data_shape_case1 = { 4, 3 };
-std::vector<float>          _input_data_value_case1 = { 0.f, 1.f, 2.f,
-                                                        3.f, 4.f, 5.f,
-                                                        6.f, 7.f, 8.f,
-                                                        9.f, 10.f, 11.f };
-InferenceEngine::SizeVector _input_indices_shape_case1 = { 5 };
-std::vector<float>          _input_indices_value_case1 = { 3.f, 1.f, 1.f, 0.f, 2.f };
-InferenceEngine::SizeVector _input_segment_ids_shape_case1 = { 5 };
-std::vector<float>          _input_segment_ids_value_case1 = { 0.f, 0.f, 2.f, 2.f, 4.f };
-InferenceEngine::SizeVector _output_shape_case1 = { 5, 3 };
-std::vector<float>          _output_value_ref_case1 = { 6.f, 7.f, 8.f,
-                                                        0.f, 0.f, 0.f,
-                                                        1.5f, 2.5f, 3.5f,
-                                                        0.f, 0.f, 0.f,
-                                                        6.f, 7.f, 8.f };
-
-// case 2 - reduce = "sqrtn", 5 segments, where two segments are empty
-std::string                 _reduce_op_case2 = "SparseSegmentSqrtN";
-InferenceEngine::SizeVector _input_data_shape_case2 = { 4, 3 };
-std::vector<float>          _input_data_value_case2 = { 0.f, 1.f, 2.f,
-                                                        3.f, 4.f, 5.f,
-                                                        6.f, 7.f, 8.f,
-                                                        9.f, 10.f, 11.f };
-InferenceEngine::SizeVector _input_indices_shape_case2 = { 6 };
-std::vector<float>          _input_indices_value_case2 = { 0.f, 1.f, 2.f, 3.f, 1.f, 0.f};
-InferenceEngine::SizeVector _input_segment_ids_shape_case2 = { 6 };
-std::vector<float>          _input_segment_ids_value_case2 = { 0.f, 0.f, 0.f, 0.f, 2.f, 4.f };
-InferenceEngine::SizeVector _output_shape_case2 = { 6, 3 };
-std::vector<float>          _output_value_ref_case2 = { 9.f, 11.f, 13.f,
-                                                        0.f, 0.f, 0.f,
-                                                        3.f, 4.f, 5.f,
-                                                        0.f, 0.f, 0.f,
-                                                        0.f, 1.f, 2.f,
-                                                        0.f, 0.f, 0.f};
-
-INSTANTIATE_TEST_CASE_P(
-    TestsSparseSegmentReduce, MKLDNNCPUExtSparseSegmentReduceTests,
-    ::testing::Values(
-        // case 0 - reduce with sum operation, 5 segments, where two segments are empty
-        sparse_segment_reduce_test_params{
-            model, "FP32", reduce_op_case0,
-            input_data_shape_case0, input_data_value_case0,
-            input_indices_shape_case0, input_indices_value_case0,
-            input_segment_ids_shape_case0, input_segment_ids_value_case0,
-            output_shape_case0, output_value_ref_case0,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 1 - reduce with mean operation, 5 segments, where two segments are empty
-        sparse_segment_reduce_test_params{
-            model, "FP32", _reduce_op_case1,
-            _input_data_shape_case1, _input_data_value_case1,
-            _input_indices_shape_case1, _input_indices_value_case1,
-            _input_segment_ids_shape_case1, _input_segment_ids_value_case1,
-            _output_shape_case1, _output_value_ref_case1,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 2 - reduce with sqrtn operation, 5 segments, where two segments are empty
-        sparse_segment_reduce_test_params{
-            model, "FP32", _reduce_op_case2,
-            _input_data_shape_case2, _input_data_value_case2,
-            _input_indices_shape_case2, _input_indices_value_case2,
-            _input_segment_ids_shape_case2, _input_segment_ids_value_case2,
-            _output_shape_case2, _output_value_ref_case2,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        }
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp
deleted file mode 100644
index be143c740e5..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp
+++ /dev/null
@@ -1,279 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-#include <algorithm>
-#include <vector>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct sparse_to_dense_test_params {
-    std::string model;
-    std::string precision;
-
-    InferenceEngine::SizeVector input_indices_shape;
-    std::vector<int> input_indices;
-    InferenceEngine::SizeVector input_dense_shape_shape;
-    std::vector<int> input_dense_shape;
-    InferenceEngine::SizeVector input_values_shape;
-    std::vector<int> input_values;
-    int input_default_value;
-
-    InferenceEngine::SizeVector output_shape;
-    std::vector<int> output_value_ref;
-
-    size_t num_prim_desc;
-    int selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNCPUExtSparseToDenseTests : public TestsCommon, public WithParamInterface<sparse_to_dense_test_params> {
-    std::string getModel(sparse_to_dense_test_params p) {
-        std::string model = p.model;
-
-        std::string input_indices_shape;
-        std::string input_dense_shape_shape;
-        std::string input_values_shape;
-        std::string output_shape;
-
-        for (auto& shape : p.input_indices_shape) {
-            input_indices_shape += "<dim>";
-            input_indices_shape += std::to_string(shape) + "</dim>\n";
-        }
-        for (auto& shape : p.input_dense_shape_shape) {
-            input_dense_shape_shape += "<dim>";
-            input_dense_shape_shape += std::to_string(shape) + "</dim>\n";
-        }
-        for (auto& shape : p.input_values_shape) {
-            input_values_shape += "<dim>";
-            input_values_shape += std::to_string(shape) + "</dim>\n";
-        }
-        for (auto& shape : p.output_shape) {
-            output_shape += "<dim>";
-            output_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_INPUT_INDICES_SHAPE_", input_indices_shape);
-        REPLACE_WITH_STR(model, "_INPUT_DENSE_SHAPE_SHAPE_", input_dense_shape_shape);
-        REPLACE_WITH_STR(model, "_INPUT_VALUES_SHAPE_", input_values_shape);
-        REPLACE_WITH_STR(model, "_OUTPUT_SHAPE_", output_shape);
-
-        return model;
-    }
-
-protected:
-    static void compare_int(
-        InferenceEngine::Blob &res,
-        InferenceEngine::Blob &ref,
-        int max_diff = 0,
-        const std::string assertDetails = "") {
-        int *res_ptr = res.buffer().as<int*>();
-        size_t res_size = res.size();
-
-        int *ref_ptr = ref.buffer().as<int*>();
-        size_t ref_size = ref.size();
-
-        ASSERT_EQ(res_size, ref_size) << assertDetails;
-
-        for (size_t i = 0; i < ref_size; i++) {
-            ASSERT_EQ(res_ptr[i], ref_ptr[i]) << assertDetails;
-        }
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            sparse_to_dense_test_params p = ::testing::WithParamInterface<sparse_to_dense_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-
-            for (auto &node : nodes) {
-                if (node->getName() == "SparseToDense") {
-                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                        node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            // prepare input blob and input blob map
-            InferenceEngine::BlobMap input_blob_map;
-            InferenceEngine::Blob::Ptr input_indices = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                p.input_indices_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_indices_shape) });
-            input_indices->allocate();
-            auto *input_indices_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(input_indices.get());
-            std::copy(p.input_indices.begin(), p.input_indices.end(), (int *)input_indices_ptr->data());
-            input_blob_map["InputIndices"] = input_indices;
-
-            InferenceEngine::Blob::Ptr input_dense_shape = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                p.input_dense_shape_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_dense_shape_shape) });
-            input_dense_shape->allocate();
-            auto *input_dense_shape_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(input_dense_shape.get());
-            std::copy(p.input_dense_shape.begin(), p.input_dense_shape.end(), (int *)input_dense_shape_ptr->data());
-            input_blob_map["InputDenseShape"] = input_dense_shape;
-
-            InferenceEngine::Blob::Ptr input_values = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                p.input_values_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_values_shape) });
-            input_values->allocate();
-            auto *input_values_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(input_values.get());
-            std::copy(p.input_values.begin(), p.input_values.end(), (int *)input_values_ptr->data());
-            input_blob_map["InputValues"] = input_values;
-
-            InferenceEngine::Blob::Ptr input_default_value = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                { }, InferenceEngine::TensorDesc::getLayoutByDims({ }) });
-            input_default_value->allocate();
-            auto *input_default_value_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(input_default_value.get());
-            *((int *)input_default_value_ptr->data()) = p.input_default_value;
-            input_blob_map["InputDefaultValue"] = input_default_value;
-
-            // prepare output blob map
-            InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-            InferenceEngine::BlobMap output_blob_map;
-            for (auto iter = out.begin(); iter != out.end(); iter++) {
-                std::pair<std::string, InferenceEngine::DataPtr> item = *iter;
-                InferenceEngine::Blob::Ptr output_blob_ptr = InferenceEngine::make_shared_blob<int>(item.second->getTensorDesc());
-                output_blob_ptr->allocate();
-                output_blob_map[item.first] = output_blob_ptr;
-            }
-
-            // prepare blobs with reference data
-            InferenceEngine::Blob::Ptr output_blob_ref = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                 p.output_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_shape) });
-            output_blob_ref->allocate();
-            auto *output_blob_ref_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(output_blob_ref.get());
-            std::copy(p.output_value_ref.begin(), p.output_value_ref.end(), (int *)output_blob_ref_ptr->data());
-
-            // infer
-            graph.Infer(input_blob_map, output_blob_map);
-
-            // check the result
-            auto iter = out.begin();
-            compare_int(*output_blob_map[iter->first], *output_blob_ref, 0);
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtSparseToDenseTests, TestsSparseToDense) {}
-
-// model 1 that contains one SparseToDense layer
-std::string sp2d_model1 = R"V0G0N(
-<net Name="SparseToDense_net" version="2" precision="I32" batch="1">
-    <layers>
-        <layer name="InputIndices" type="Input" precision="I32" id="0">
-            <output>
-                <port id="0">
-                    _INPUT_INDICES_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputDenseShape" type="Input" precision="I32" id="1">
-            <output>
-                <port id="0">
-                    _INPUT_DENSE_SHAPE_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputValues" type="Input" precision="I32" id="2">
-            <output>
-                <port id="0">
-                    _INPUT_VALUES_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputDefaultValue" type="Input" precision="I32" id="3">
-            <output>
-                <port id="0"/>
-            </output>
-        </layer>
-		<layer id="4" name="SparseToDenseLayer" type="SparseToDense">
-			<input>
-				<port id="0">
-                    _INPUT_INDICES_SHAPE_
-				</port>
-				<port id="1">
-                    _INPUT_DENSE_SHAPE_SHAPE_
-				</port>
-				<port id="2">
-                    _INPUT_VALUES_SHAPE_
-				</port>
-				<port id="3"/>
-			</input>
-			<output>
-				<port id="4" precision="I32">
-                    _OUTPUT_SHAPE_
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="4" to-port="1"/>
-        <edge from-layer="2" from-port="0" to-layer="4" to-port="2"/>
-        <edge from-layer="3" from-port="0" to-layer="4" to-port="3"/>
-    </edges>
-</net>
-)V0G0N";
-
-// case 1 - it contains of the default value input
-InferenceEngine::SizeVector sp2d_input_indices_shape_case1 = { 5, 2 };
-std::vector<int>          sp2d_input_indices_case1 = { 0, 1,
-                                                       1, 2,
-                                                       1, 3,
-                                                       3, 0,
-                                                       3, 4 };
-InferenceEngine::SizeVector sp2d_input_dense_shape_shape_case1 = { 2 };
-std::vector<int>          sp2d_input_dense_shape_case1 = { 4, 5};
-InferenceEngine::SizeVector sp2d_input_values_shape_case1 = { 5 };
-std::vector<int>          sp2d_input_values_case1 = { 8,
-                                                      1,
-                                                      2,
-                                                      1,
-                                                      8 };
-int                         sp2d_input_default_value_case1 = -1;
-InferenceEngine::SizeVector sp2d_output_shape_case1 = { 4, 5};
-std::vector<int>          sp2d_output_value_ref_case1 = { -1, 8, -1, -1, -1,
-                                                          -1, -1, 1, 2, -1,
-                                                          -1, -1, -1, -1, -1,
-                                                           1, -1, -1, -1, 8};
-
-INSTANTIATE_TEST_CASE_P(
-    TestsSparseToDense, MKLDNNCPUExtSparseToDenseTests,
-    ::testing::Values(
-        sparse_to_dense_test_params{
-            sp2d_model1, "I32",
-            sp2d_input_indices_shape_case1, sp2d_input_indices_case1,
-            sp2d_input_dense_shape_shape_case1, sp2d_input_dense_shape_case1,
-            sp2d_input_values_shape_case1, sp2d_input_values_case1,
-            sp2d_input_default_value_case1,
-            sp2d_output_shape_case1, sp2d_output_value_ref_case1,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        }
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp
deleted file mode 100644
index 0917fd9c0d0..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp
+++ /dev/null
@@ -1,416 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-#include <algorithm>
-#include <vector>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct sparse_weighted_reduce_test_params {
-    std::string model;
-    std::string precision;
-    std::string reduce_operation;
-    bool with_weights;
-
-    InferenceEngine::SizeVector input_indices_shape;
-    std::vector<float> input_indices;
-    InferenceEngine::SizeVector input_values_shape;
-    std::vector<float> input_values;
-    InferenceEngine::SizeVector input_dense_shape_shape;
-    std::vector<float> input_dense_shape;
-    InferenceEngine::SizeVector input_params_table_shape;
-    std::vector<float> input_params_table;
-    int input_default_value;
-    InferenceEngine::SizeVector input_weights_shape;
-    std::vector<float> input_weights;
-
-    InferenceEngine::SizeVector output_shape;
-    std::vector<float> output_value_ref;
-
-    size_t num_prim_desc;
-    int selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNCPUExtExperimentalSparseWeightedReduceTests : public TestsCommon, public WithParamInterface<sparse_weighted_reduce_test_params> {
-    std::string getModel(sparse_weighted_reduce_test_params p) {
-        std::string model = p.model;
-
-        std::string input_indices_shape;
-        std::string input_values_shape;
-        std::string input_dense_shape_shape;
-        std::string input_params_table_shape;
-        std::string input_weights_shape;
-        std::string output_shape;
-
-        for (auto& shape : p.input_indices_shape) {
-            input_indices_shape += "<dim>";
-            input_indices_shape += std::to_string(shape) + "</dim>\n";
-        }
-        for (auto& shape : p.input_values_shape) {
-            input_values_shape += "<dim>";
-            input_values_shape += std::to_string(shape) + "</dim>\n";
-        }
-        for (auto& shape : p.input_dense_shape_shape) {
-            input_dense_shape_shape += "<dim>";
-            input_dense_shape_shape += std::to_string(shape) + "</dim>\n";
-        }
-        for (auto& shape : p.input_params_table_shape) {
-            input_params_table_shape += "<dim>";
-            input_params_table_shape += std::to_string(shape) + "</dim>\n";
-        }
-        if (p.with_weights) {
-            for (auto& shape : p.input_weights_shape) {
-                input_weights_shape += "<dim>";
-                input_weights_shape += std::to_string(shape) + "</dim>\n";
-            }
-        }
-
-        for (auto& shape : p.output_shape) {
-            output_shape += "<dim>";
-            output_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_PRECISION_", p.precision);
-        REPLACE_WITH_STR(model, "_REDUCE_OPERATION_", p.reduce_operation);
-
-        REPLACE_WITH_STR(model, "_INPUT_INDICES_SHAPE_", input_indices_shape);
-        REPLACE_WITH_STR(model, "_INPUT_VALUES_SHAPE_", input_values_shape);
-        REPLACE_WITH_STR(model, "_INPUT_DENSE_SHAPE_SHAPE_", input_dense_shape_shape);
-        REPLACE_WITH_STR(model, "_INPUT_PARAMS_TABLE_SHAPE_", input_params_table_shape);
-        if (p.with_weights) {
-            REPLACE_WITH_STR(model, "_INPUT_WEIGHTS_SHAPE_", input_weights_shape);
-        }
-        REPLACE_WITH_STR(model, "_OUTPUT_SHAPE_", output_shape);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            sparse_weighted_reduce_test_params p = ::testing::WithParamInterface<sparse_weighted_reduce_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-
-            for (auto &node : nodes) {
-                if (node->getName() == "ExperimentalSparseWeightedReduce") {
-                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                        node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            // prepare input blob and input blob map
-            InferenceEngine::BlobMap input_blob_map;
-            InferenceEngine::Blob::Ptr input_indices = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                p.input_indices_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_indices_shape) });
-            input_indices->allocate();
-            auto *input_indices_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(input_indices.get());
-            std::vector<int> input_indices_int(p.input_indices.begin(), p.input_indices.end());
-            std::copy(input_indices_int.begin(), input_indices_int.end(), (int *)input_indices_ptr->data());
-            input_blob_map["InputIndices"] = input_indices;
-
-            InferenceEngine::Blob::Ptr input_values = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                p.input_values_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_values_shape) });
-            input_values->allocate();
-            auto *input_values_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(input_values.get());
-            std::vector<int> input_values_int(p.input_values.begin(), p.input_values.end());
-            std::copy(input_values_int.begin(), input_values_int.end(), (int *)input_values_ptr->data());
-            input_blob_map["InputValues"] = input_values;
-
-            InferenceEngine::Blob::Ptr input_dense_shape = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                p.input_dense_shape_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_dense_shape_shape) });
-            input_dense_shape->allocate();
-            auto *input_dense_shape_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(input_dense_shape.get());
-            std::vector<int> input_dense_shape_int(p.input_dense_shape.begin(), p.input_dense_shape.end());
-            std::copy(input_dense_shape_int.begin(), input_dense_shape_int.end(), (int *)input_dense_shape_ptr->data());
-            input_blob_map["InputDenseShape"] = input_dense_shape;
-
-            InferenceEngine::Blob::Ptr input_params_table = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_params_table_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_params_table_shape) });
-            input_params_table->allocate();
-            auto *input_params_table_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_params_table.get());
-            std::copy(p.input_params_table.begin(), p.input_params_table.end(), (float *)input_params_table_ptr->data());
-            input_blob_map["InputParamsTable"] = input_params_table;
-
-            InferenceEngine::Blob::Ptr input_default_value = InferenceEngine::make_shared_blob<int>({ InferenceEngine::Precision::I32,
-                { }, InferenceEngine::TensorDesc::getLayoutByDims({ }) });
-            input_default_value->allocate();
-            auto *input_default_value_ptr = dynamic_cast<InferenceEngine::TBlob<int>*>(input_default_value.get());
-            *((int *)input_default_value_ptr->data()) = p.input_default_value;
-            input_blob_map["InputDefaultValue"] = input_default_value;
-
-            if (p.with_weights) {
-                InferenceEngine::Blob::Ptr input_weights = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                    p.input_weights_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_weights_shape) });
-                input_weights->allocate();
-                auto *input_weights_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input_weights.get());
-                std::copy(p.input_weights.begin(), p.input_weights.end(), (float *)input_weights_ptr->data());
-                input_blob_map["InputWeights"] = input_weights;
-            }
-
-            // prepare output blob map
-            InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-            InferenceEngine::BlobMap output_blob_map;
-            for (auto iter = out.begin(); iter != out.end(); iter++) {
-                std::pair<std::string, InferenceEngine::DataPtr> item = *iter;
-                InferenceEngine::Blob::Ptr output_blob_ptr = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output_blob_ptr->allocate();
-                output_blob_map[item.first] = output_blob_ptr;
-            }
-
-            // prepare blobs with reference data
-            InferenceEngine::Blob::Ptr output_blob_ref = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                 p.output_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_shape) });
-            output_blob_ref->allocate();
-            auto *output_blob_ref_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(output_blob_ref.get());
-            std::copy(p.output_value_ref.begin(), p.output_value_ref.end(), (float *)output_blob_ref_ptr->data());
-
-            // infer
-            graph.Infer(input_blob_map, output_blob_map);
-
-            // check the result
-            auto iter = out.begin();
-            compare(*output_blob_map[iter->first], *output_blob_ref, 0.0f);
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtExperimentalSparseWeightedReduceTests, TestsExperimentalSparseWeightedReduce) {}
-
-// model 1 that contains one ExperimentalSparseWeightedReduce layer with the weights input
-std::string swr_model1 = R"V0G0N(
-<net Name="ExperimentalSparseWeightedReduce_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputIndices" type="Input" precision="_PRECISION_" id="0">
-            <output>
-                <port id="0">
-                    _INPUT_INDICES_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputValues" type="Input" precision="_PRECISION_" id="1">
-            <output>
-                <port id="0">
-                    _INPUT_VALUES_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputDenseShape" type="Input" precision="_PRECISION_" id="2">
-            <output>
-                <port id="0">
-                    _INPUT_DENSE_SHAPE_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputParamsTable" type="Input" precision="FP32" id="3">
-            <output>
-                <port id="0">
-                    _INPUT_PARAMS_TABLE_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputDefaultValue" type="Input" precision="I32" id="4">
-            <output>
-                <port id="0"/>
-            </output>
-        </layer>
-        <layer name="InputWeights" type="Input" precision="FP32" id="5">
-            <output>
-                <port id="0">
-                    _INPUT_WEIGHTS_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer id="6" name="ExperimentalSparseWeightedReduceLayer" type="_REDUCE_OPERATION_">
-            <input>
-                <port id="0">
-                    _INPUT_INDICES_SHAPE_
-                </port>
-                <port id="1">
-                    _INPUT_VALUES_SHAPE_
-                </port>
-                <port id="2">
-                    _INPUT_DENSE_SHAPE_SHAPE_
-                </port>
-                <port id="3">
-                    _INPUT_PARAMS_TABLE_SHAPE_
-                </port>
-                <port id="4"/>
-                <port id="5">
-                    _INPUT_WEIGHTS_SHAPE_
-                </port>
-            </input>
-            <output>
-                <port id="6" precision="FP32">
-                    _OUTPUT_SHAPE_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="6" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="6" to-port="1"/>
-        <edge from-layer="2" from-port="0" to-layer="6" to-port="2"/>
-        <edge from-layer="3" from-port="0" to-layer="6" to-port="3"/>
-        <edge from-layer="4" from-port="0" to-layer="6" to-port="4"/>
-        <edge from-layer="5" from-port="0" to-layer="6" to-port="5"/>
-    </edges>
-</net>
-)V0G0N";
-
-// model 2 that contains one ExperimentalSparseWeightedReduce layer without the weights input
-std::string swr_model2 = R"V0G0N(
-<net Name="ExperimentalSparseWeightedReduce_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputIndices" type="Input" precision="_PRECISION_" id="0">
-            <output>
-                <port id="0">
-                    _INPUT_INDICES_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputValues" type="Input" precision="_PRECISION_" id="1">
-            <output>
-                <port id="0">
-                    _INPUT_VALUES_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputDenseShape" type="Input" precision="_PRECISION_" id="2">
-            <output>
-                <port id="0">
-                    _INPUT_DENSE_SHAPE_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputParamsTable" type="Input" precision="FP32" id="3">
-            <output>
-                <port id="0">
-                    _INPUT_PARAMS_TABLE_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="InputDefaultValue" type="Input" precision="I32" id="4">
-            <output>
-                <port id="0"/>
-            </output>
-        </layer>
-        <layer id="6" name="ExperimentalSparseWeightedReduceLayer" type="_REDUCE_OPERATION_">
-            <input>
-                <port id="0">
-                    _INPUT_INDICES_SHAPE_
-                </port>
-                <port id="1">
-                    _INPUT_VALUES_SHAPE_
-                </port>
-                <port id="2">
-                    _INPUT_DENSE_SHAPE_SHAPE_
-                </port>
-                <port id="3">
-                    _INPUT_PARAMS_TABLE_SHAPE_
-                </port>
-                <port id="4"/>
-            </input>
-            <output>
-                <port id="6" precision="FP32">
-                    _OUTPUT_SHAPE_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="6" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="6" to-port="1"/>
-        <edge from-layer="2" from-port="0" to-layer="6" to-port="2"/>
-        <edge from-layer="3" from-port="0" to-layer="6" to-port="3"/>
-        <edge from-layer="4" from-port="0" to-layer="6" to-port="4"/>
-    </edges>
-</net>
-)V0G0N";
-
-// case 1 - ExperimentalSparseWeightedSum, I32, the model with weights input
-std::string                 swr_precision_case2 = "I32";
-std::string                 swr_reduce_operation_case2 = "ExperimentalSparseWeightedSum";
-bool                        swr_with_weights_case2 = true;
-InferenceEngine::SizeVector swr_input_indices_shape_case2 = { 5, 2 };
-std::vector<float>          swr_input_indices_case2 = { 0.0f, 1.0f,
-                                                        1.0f, 2.0f,
-                                                        1.0f, 3.0f,
-                                                        3.0f, 0.0f,
-                                                        3.0f, 4.0f };
-InferenceEngine::SizeVector swr_input_values_shape_case2 = { 5 };
-std::vector<float>          swr_input_values_case2 = { 3.0f,
-                                                       1.0f,
-                                                       2.0f,
-                                                       1.0f,
-                                                       4.0f };
-InferenceEngine::SizeVector swr_input_dense_shape_shape_case2 = { 2 };
-std::vector<float>          swr_input_dense_shape_case2 = { 4.0f, 5.0f };
-InferenceEngine::SizeVector swr_input_params_table_shape_case2 = { 5, 3 };
-std::vector<float>          swr_input_params_table_case2 = { 1.0f, 2.0f, 3.0f,
-                                                             4.0f, 5.0f, 6.0f,
-                                                             6.0f, 5.0f, 4.0f,
-                                                             3.0f, 2.0f, 1.0f,
-                                                             10.0f, 11.0f, 12.0f };
-int                         swr_input_default_value_case2 = 0;
-InferenceEngine::SizeVector swr_input_weights_shape_case2 = { 5 };
-std::vector<float>          swr_input_weights_case2 = { 1.0f,
-                                                        2.0f,
-                                                        0.5f,
-                                                        1.0f,
-                                                        3.0f };
-InferenceEngine::SizeVector swr_output_shape_case2 = { 4, 3 };
-std::vector<float>          swr_output_value_ref_case2 = { 3.0f, 2.0f, 1.0f,
-                                                           11.0f, 12.5f, 14.0f,
-                                                           1.0f, 2.0f, 3.0f,
-                                                           34.0f, 38.0f, 42.0f };
-
-
-INSTANTIATE_TEST_CASE_P(
-    TestsExperimentalSparseWeightedReduce, MKLDNNCPUExtExperimentalSparseWeightedReduceTests,
-    ::testing::Values(
-        sparse_weighted_reduce_test_params{
-            swr_model1, swr_precision_case2, swr_reduce_operation_case2, swr_with_weights_case2,
-            swr_input_indices_shape_case2, swr_input_indices_case2,
-            swr_input_values_shape_case2, swr_input_values_case2,
-            swr_input_dense_shape_shape_case2, swr_input_dense_shape_case2,
-            swr_input_params_table_shape_case2, swr_input_params_table_case2,
-            swr_input_default_value_case2,
-            swr_input_weights_shape_case2, swr_input_weights_case2,
-            swr_output_shape_case2, swr_output_value_ref_case2,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        }
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp
deleted file mode 100644
index ca8f1fc24e4..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp
+++ /dev/null
@@ -1,487 +0,0 @@
-﻿// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct strided_slice_test_params {
-    InferenceEngine::SizeVector in_shape;
-    size_t           dim_size;
-    std::vector<int32_t> begin;
-    std::vector<int32_t> end;
-    std::vector<int32_t> stride;
-
-    InferenceEngine::SizeVector begin_mask;
-    InferenceEngine::SizeVector end_mask;
-    InferenceEngine::SizeVector ellipsis_mask;
-    InferenceEngine::SizeVector new_axis_mask;
-    InferenceEngine::SizeVector shrink_axis_mask;
-    InferenceEngine::SizeVector out_shape;
-    std::vector<float> reference;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-inline void clipping(int *idx, const int min, const int max) {
-    (*idx) = ((*idx) > min) ? (*idx) : min;
-    (*idx) = ((*idx) < max) ? (*idx) : (max - 1);
-    return;
-}
-
-void ref_strided_slice(
-    InferenceEngine::TBlob<float> &src,
-    InferenceEngine::TBlob<float> &dst,
-    InferenceEngine::SizeVector &out_dims,
-    std::vector<int> begin,
-    std::vector<int> end,
-    std::vector<int> stride,
-    InferenceEngine::SizeVector begin_mask,
-    InferenceEngine::SizeVector end_mask,
-    InferenceEngine::SizeVector ellipsis_mask,
-    InferenceEngine::SizeVector new_axis_mask,
-    InferenceEngine::SizeVector shrink_axis_mask
-) {
-    size_t i;
-    const float *src_data = src.data();
-    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
-    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
-    float* dst_data = dst.data();
-    InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
-    InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
-
-    int new_axis = 0;
-    for (auto& na : new_axis_mask)
-        new_axis += na;
-
-    int shrink_axis = 0;
-    for (auto& sa : shrink_axis_mask)
-        shrink_axis += sa;
-    int max_dims = src_dims.size() + new_axis;
-//    if ((max_dims - shrink_axis) != dst_dims.size())
-//        FAIL() << "Destination dims should be equal source dims + new axis - shrink_axis";
-
-    //  Check beging/end/stride vector sizes
-    int bounds_size = 0;
-    if (begin.size() && end.size() && begin.size() != end.size()) FAIL() << "Begin vector size should be equal end vectror size";
-    if (begin.size() && stride.size() && stride.size() != begin.size()) FAIL() << "Stride vector size should be equal begin vectror size";
-    if (end.size() && stride.size() && stride.size() != end.size()) FAIL() << "Stride vector size should be equal end vectror size";
-
-    if (begin.size()) bounds_size = begin.size();
-    if (end.size()) bounds_size = end.size();
-    if (stride.size()) bounds_size = stride.size();
-
-    //  ellipsis_mask must be a power of two (only one ellipsis), so to take a first position
-    int ellipsis_pos1, ellipsis_pos2;
-    ellipsis_pos1 = ellipsis_pos2 = max_dims;
-    for (i = 0; i < ellipsis_mask.size(); i++) {
-        if (ellipsis_mask[i] > 0) {
-            ellipsis_pos1 = i;
-            break;
-        }
-    }
-    bounds_size -= ellipsis_pos1;
-    if(bounds_size > 0 && (max_dims - bounds_size) > ellipsis_pos1)
-        ellipsis_pos2 = max_dims - bounds_size;
-
-    std::vector<int> begin_dms(max_dims, 0);
-    std::vector<int> end_dms(max_dims, -1);
-    std::vector<int> stride_dms(max_dims, 1);
-
-    int j, k, bj, ej, sj;
-    InferenceEngine::SizeVector our_dims;
-    for (i = 0, j = 0, k = 0, bj = 0, ej = 0, sj = 0; i < max_dims; i++) {
-        if (i >= ellipsis_pos1 && i < ellipsis_pos2) {
-            if (!(new_axis_mask.size() > i && new_axis_mask[i] == 1)) {
-                end_dms[i] = end_dms[i] >= 0 ? end_dms[i] : src_dims[j++] + end_dms[i];
-            } else {
-                //end_dms[i] = 0;
-                end_dms[i] = begin_dms[i];
-            }
-            out_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
-            our_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
-            k = ellipsis_pos1;
-            continue;
-        }
-        stride_dms[i] = (stride.size() > sj && stride[sj] != 0) ? stride[sj++] : 1;
-
-        if (!(begin_mask.size() > j && begin_mask[j] == 0))
-            begin_dms[i] = begin.size() > bj ? begin[bj] : (stride_dms[i] > 0 ? 0 : -1);
-        else
-            begin_dms[i] = stride_dms[i] > 0 ? 0 : -1;
-        bj++;
-        begin_dms[i] = begin_dms[i] >= 0 ? begin_dms[i] : src_dims[j] + begin_dms[i];
-        //  Clipping 'begin'
-        clipping(&begin_dms[i], 0, src_dims[j]);
-
-        if (!(end_mask.size() > j && end_mask[j] == 0)) {
-            int end_dms_tmp = end.size() > ej ? (stride_dms[i] > 0 ? end[ej] - 1 : end[ej] + 1) : end_dms[i];
-            end_dms[i] = end.size() > ej ? end_dms_tmp : (stride_dms[i] > 0 ? -1 : 0);
-        }
-        else {
-            end_dms[i] = stride_dms[i] > 0 ? -1 : 0;
-        }
-        ej++;
-        end_dms[i] = end_dms[i] >= 0 ? end_dms[i] : src_dims[j] + end_dms[i];
-        //  Clipping 'end'
-        clipping(&end_dms[i], 0, src_dims[j]);
-
-        if (!(new_axis_mask.size() > i && new_axis_mask[i] == 1))
-            j++;
-        else
-            end_dms[i] = 0;
-
-        if (shrink_axis_mask.size() > k && shrink_axis_mask[k] == 1)
-            end_dms[i] = begin_dms[i];
-        else
-            out_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
-
-        our_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
-        k++;
-    }
-
-    size_t work_amount_dst = dstStrides[0] * dst_dims[0];
-    InferenceEngine::SizeVector counters(max_dims, 0);
-
-    for (size_t iwork = 0, dst_idx = 0; iwork < work_amount_dst; ++iwork) {
-        int src_idx = 0;
-        for (i = 0, j = 0; i < max_dims; ++i) {
-            src_idx += (begin_dms[i] + counters[i] * stride_dms[i]) * srcStrides[j];
-            if (!(new_axis_mask.size() > i && new_axis_mask[i] == 1)) j++;
-        }
-
-        dst_data[dst_idx++] = src_data[src_idx];
-
-        for (j = max_dims - 1; j >= 0; j--) {
-            counters[j] = (counters[j] + 1) % our_dims[j];
-            if (counters[j] != 0) break;
-        }
-    }
-}
-
-class MKLDNNCPUExtStridedSliceTests : public TestsCommon, public WithParamInterface<strided_slice_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="StridedSlice_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IN_
-                </port>
-            </output>
-        </layer>
-        <layer name="begin" type="Input" precision="I32" id="2">
-            <output>
-                <port id="2">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="end" type="Input" precision="I32" id="3">
-            <output>
-                <port id="3">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="strides" type="Input" precision="I32" id="4">
-            <output>
-                <port id="4">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="StridedSlice" precision="FP32">
-            <data _BEGIN_ _END_ _ELLIPSIS_ _NEW_AXIS_ _SHRINK_/>
-            <input>
-                <port id="1">
-                    _IN_
-                </port>
-                <port id="2">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-                <port id="3">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-                <port id="4">
-                    <dim>_DIM_SIZE_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="5">
-                    _OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
-        <edge from-layer="3" from-port="3" to-layer="2" to-port="3"/>
-        <edge from-layer="4" from-port="4" to-layer="2" to-port="4"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(strided_slice_test_params p) {
-        std::string model = model_t;
-        std::string in_shape;
-        std::string out_shape;
-        std::string begin;
-        std::string end;
-        std::string ellipsis;
-        std::string new_axis;
-        std::string shrink_axis;
-
-        for (size_t i = 0; i < p.in_shape.size(); i++) {
-            in_shape += "<dim>";
-            in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
-        }
-        in_shape.pop_back();
-        REPLACE_WITH_STR(model, "_IN_", in_shape);
-        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.dim_size);
-
-        if (p.begin_mask.size()) {
-            begin = "begin_mask=\"";
-            for (auto& pb : p.begin_mask)
-                begin += std::to_string(pb) + ",";
-            begin.pop_back();
-            begin += "\"";
-        }
-        REPLACE_WITH_STR(model, "_BEGIN_", begin);
-
-        if (p.end_mask.size()) {
-            end = "end_mask=\"";
-            for (auto& pb : p.end_mask)
-                end += std::to_string(pb) + ",";
-            end.pop_back();
-            end += "\"";
-        }
-        REPLACE_WITH_STR(model, "_END_", end);
-
-        if (p.ellipsis_mask.size()) {
-            ellipsis = "ellipsis_mask=\"";
-            for (auto& pb : p.ellipsis_mask)
-                ellipsis += std::to_string(pb) + ",";
-            ellipsis.pop_back();
-            ellipsis += "\"";
-        }
-        REPLACE_WITH_STR(model, "_ELLIPSIS_", ellipsis);
-
-        if (p.new_axis_mask.size()) {
-            new_axis = "new_axis_mask=\"";
-            for (auto& pb : p.new_axis_mask)
-                new_axis += std::to_string(pb) + ",";
-            new_axis.pop_back();
-            new_axis += "\"";
-        }
-        REPLACE_WITH_STR(model, "_NEW_AXIS_", new_axis);
-
-        if (p.shrink_axis_mask.size()) {
-            shrink_axis = "shrink_axis_mask=\"";
-            for (auto& pb : p.shrink_axis_mask)
-                shrink_axis += std::to_string(pb) + ",";
-            shrink_axis.pop_back();
-            shrink_axis += "\"";
-        }
-        REPLACE_WITH_STR(model, "_SHRINK_", shrink_axis);
-
-        for (size_t i = 0; i < p.out_shape.size(); i++) {
-            out_shape += "<dim>";
-            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
-        }
-        out_shape.pop_back();
-        REPLACE_WITH_STR(model, "_OUT_", out_shape);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            strided_slice_test_params p = ::testing::WithParamInterface<strided_slice_test_params>::GetParam();
-            std::string model = getModel(p);
-            ////std::cout << model;
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            // Input Data
-            InferenceEngine::Blob::Ptr src;
-            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
-            src->allocate();
-            fill_data_dbgval(src->buffer(), src->size());
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            // Input Begin
-            InferenceEngine::Blob::Ptr beginIdx;
-            InferenceEngine::SizeVector begin_dim(1, p.dim_size);
-            beginIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, begin_dim, InferenceEngine::TensorDesc::getLayoutByDims(begin_dim) });
-            beginIdx->allocate();
-            if (p.begin.size())
-                memcpy(static_cast<int32_t*>(beginIdx->buffer()), &p.begin[0], sizeof(int32_t)*p.begin.size());
-            else
-                memset(static_cast<int32_t*>(beginIdx->buffer()), 0, sizeof(int32_t)*p.begin.size());
-            auto * beginIdxPtr = dynamic_cast<InferenceEngine::TBlob<int>*>(beginIdx.get());
-            if (beginIdxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            // Input End
-            InferenceEngine::Blob::Ptr endIdx;
-            InferenceEngine::SizeVector end_dim(1, p.dim_size);
-            endIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, end_dim, InferenceEngine::TensorDesc::getLayoutByDims(end_dim) });
-            endIdx->allocate();
-            if (p.end.size())
-                memcpy(static_cast<int32_t*>(endIdx->buffer()), &p.end[0], sizeof(int32_t)*p.end.size());
-            else
-                memset(static_cast<int32_t*>(endIdx->buffer()), 0, sizeof(int32_t)*p.end.size());
-            auto * endIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(endIdx.get());
-            if (endIdxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            // Input Stride
-            InferenceEngine::Blob::Ptr stridesIdx;
-            InferenceEngine::SizeVector strides_dim(1, p.dim_size);
-            stridesIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, strides_dim, InferenceEngine::TensorDesc::getLayoutByDims(strides_dim) });
-            stridesIdx->allocate();
-            if (p.stride.size())
-                memcpy(static_cast<int32_t*>(stridesIdx->buffer()), &p.stride[0], sizeof(int32_t)*p.stride.size());
-            else
-                memset(static_cast<int32_t*>(stridesIdx->buffer()), 0, sizeof(int32_t)*p.stride.size());
-            auto * stridesIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(stridesIdx.get());
-            if (stridesIdxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            // Check results
-            InferenceEngine::SizeVector out_dims;
-            ref_strided_slice(*srcPtr, dst_ref, out_dims, p.begin, p.end, p.stride, p.begin_mask, p.end_mask, p.ellipsis_mask, p.new_axis_mask, p.shrink_axis_mask);
-
-            //  Check results
-            if(out_dims.size() != p.out_shape.size())
-                FAIL() << "Wrong out_shape size!";
-            for (size_t i = 0; i < p.out_shape.size(); i++) {
-                if (out_dims[i] != p.out_shape[i])
-                    FAIL() << "Wrong out_shape dimensions!";
-            }
-            if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
-                FAIL() << "Wrong result with compare TF reference!";
-
-           InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("begin", beginIdx));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("end", endIdx));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("strides", stridesIdx));
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-
-//  Test data vectors
-std::vector<float> test0 =  { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f };
-std::vector<float> test2 =  { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f };
-std::vector<float> test5 =  { 5.f, 6.f, 7.f, 8.f };
-std::vector<float> test6 =  { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f };
-std::vector<float> test8 =  { 5.f, 4.f, 3.f, 2.f, 1.f };
-std::vector<float> test9 =  { 5.f, 4.f, 3.f, 2.f, 1.f, 0.f };
-std::vector<float> test10 = { 5.f, 4.f, 3.f };
-std::vector<float> test11 = { 0.f, 2.f, 4.f, 6.f, 8.f };
-std::vector<float> test12 = { 1.f, 3.f, 5.f, 7.f, 9.f };
-std::vector<float> test13 = { 9.f, 8.f, 7.f, 6.f, 5.f, 4.f, 3.f, 2.f, 1.f, 0.f };
-std::vector<float> test14 = { 9.f, 7.f, 5.f, 3.f, 1.f };
-std::vector<float> test16 = { 0.f, 1.f, 3.f, 4.f };
-std::vector<float> test17 = { 1.f, 4.f };
-std::vector<float> test19 = { 0.f, 1.f, 2.f, 3.f };
-std::vector<float> test20 = { 4.f, 5.f, 6.f, 7.f };
-/*
-0. [0,1,2,3,4,5,6,7,8,9], shape=[10]
-1. [0,1,2,3,4,5,6,7,8,9], shape=[10]
-2. [0,1,2,3,4,5,6,7,8], shape=[9]
-3. [0,1,2,3,4,5,6,7,8], shape=[9]
-4. [0,1,2,3,4,5,6,7,8,9], shape=[10]
-5. [5,6,7,8,9], shape=[5]
-6. [0,1,2,3,4,5], shape=[6]
-7. [5,6,7,8,9], shape=[5]
-8. [5,4,3,2,1], shape=[5]
-9. [5,4,3,2,1,0], shape=[6]
-10. [5,4,3], shape=[3]
-11. [0,2,4,6,8], shape=[5]
-12. [1,3,5,7,9], shape=[5]
-13. [9,8,7,6,5,4,3,2,1,0], shape=[10]
-14. [9,7,5,3,1], shape=[5]
-15. [[0,1,2,3,4,5,6,7,8,9]], shape=[1,10]
-16. [[[0,1,2],[3,4,5]]], shape=[1,2,2]
-17. [[[0,1,2],[3,4,5]]], shape=[1,2,1]
-18. [[[0,1,2],[3,4,5]]], shape=[1,1,2,1]
-19. [[[[0,1],[2,3]],[[4,5],[6,7]]]], shape=[1,2,2]
-20. [[[[0,1],[2,3]],[[4,5],[6,7]]]], shape=[1,2,2]
-21. [[[0,1,2],[3,4,5]]], shape=[1,1,2]
-*/
-
-TEST_P(MKLDNNCPUExtStridedSliceTests, DISABLED_TestsStridedSlice) {}
-INSTANTIATE_TEST_CASE_P(
-    TestsStridedSlice, MKLDNNCPUExtStridedSliceTests,
-            ::testing::Values(
-// Params: in_shape, dim_size, begin, end, stride, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, out_shape, reference
-/* 0 */         strided_slice_test_params{ { 10 }, 1, {}, {}, {}, {}, {}, {}, {}, {}, { 10 }, test0 },
-                strided_slice_test_params{ { 10 }, 1, {0}, {0}, {}, {}, {0}, {}, {}, {}, { 10 }, test0 },
-                strided_slice_test_params{ { 10 }, 1,{ -1 },{ -1 },{},{ 0 },{},{},{},{},{ 9 }, test2 },
-                strided_slice_test_params{ { 10 }, 1,{ 0 },{ -1 },{},{},{},{},{},{},{ 9 }, test2 },
-                strided_slice_test_params{ { 10 }, 1,{ 0 },{ 10 },{},{},{},{},{},{},{ 10 }, test0 },
-/* 5 */         strided_slice_test_params{ { 10 }, 1,{ 5 },{ 10 },{},{},{},{},{},{},{ 5 }, test5 },
-                strided_slice_test_params{ { 10 }, 1,{ 0 },{ 6 },{},{},{},{},{},{},{ 6 }, test6 },
-                strided_slice_test_params{ { 10 }, 1,{ -5 },{ 10 },{},{},{},{},{},{},{ 5 }, test5 },
-                strided_slice_test_params{ { 10 }, 1,{ -5 },{ 0 },{-1},{},{},{},{},{},{ 5 }, test8 },
-                strided_slice_test_params{ { 10 }, 1,{ -5 },{ 0 },{ -1 },{},{0},{},{},{},{ 6 }, test9 },
-/* 10 */        strided_slice_test_params{ { 10 }, 1,{ -5 },{ 2 },{ -1 },{},{},{},{},{},{ 3 }, test10 },
-                strided_slice_test_params{ { 10 }, 1,{ 0 },{ 0 },{ 2 },{},{0},{},{},{},{ 5 }, test11 },
-                strided_slice_test_params{ { 10 }, 1,{ 1 },{ 0 },{ 2 },{},{ 0 },{},{},{},{ 5 }, test12 },
-                strided_slice_test_params{ { 10 }, 1,{ -1 },{ 0 },{ -1 },{},{ 0 },{},{},{},{ 10 }, test13 },
-                strided_slice_test_params{ { 10 }, 1,{ -1 },{ 0 },{ -2 },{},{ 0 },{},{},{},{ 5 }, test14 },
-/* 15 */        strided_slice_test_params{ { 10 }, 1,{ 0 },{ 10 },{},{},{},{},{1},{},{ 1, 10 }, test0 },
-                strided_slice_test_params{ { 1, 2, 3 }, 2,{ 0, 0 },{ 1, 2 },{},{},{},{0, 1},{},{},{ 1, 2, 2 }, test16 },
-                strided_slice_test_params{ { 1, 2, 3 }, 4,{ 0, 0, 0, 1 },{ 2, 3, 2, 2 },{},{},{},{},{ 0,0,1,0 },{ 0,0,0,1 },{ 1,2,1 }, test17 },
-                strided_slice_test_params{ { 1, 2, 3 }, 3,{ 0, 0, 1 },{ 2, 2, 2 },{},{},{},{ 0, 1 },{ 1 },{},{ 1, 1, 2, 1 }, test17 },
-                strided_slice_test_params{ { 1, 2, 2, 2 }, 4,{},{},{},{ 0,1,0,0 },{ 0,1,0,0 },{},{},{ 0,1 },{ 1,2,2 }, test19 },
-/* 20 */        strided_slice_test_params{ { 1, 2, 2, 2 }, 4,{ 0,1,0,0 },{ 1,2,2,2 },{},{ 0,1,0,0 },{ 0,1,0,0 },{},{},{ 0,1,0,0 },{ 1,2,2 }, test20 },
-                strided_slice_test_params{ { 1, 2, 3 }, 3,{ 0, 0, 1 },{ 2, 2, 2 },{},{},{},{ 0, 1 },{ 1 },{ 0, 0, 1 },{ 1, 1, 2 }, test17 }
-            ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp
deleted file mode 100644
index a72aa4ced8a..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp
+++ /dev/null
@@ -1,519 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <stdio.h>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <algorithm>
-
-using namespace InferenceEngine;
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct topk_test_params {
-    SizeVector           in_shape;
-    std::vector<float>   input_tensor;
-    int                  axis;
-    std::vector<size_t>  src_k;
-    std::string          sort;
-    std::string          mode;
-    SizeVector           out_shape;
-    std::vector<float>   reference_val;
-    std::vector<size_t>  reference_idx;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-static inline int count(std::vector<size_t> dims, size_t start_ind, size_t end_ind) {
-    size_t count = 1;
-    for (size_t i = start_ind; i < end_ind; i++)
-        count *= dims[i];
-    return static_cast<int>(count);
-}
-
-static inline int count(std::vector<size_t> dims, size_t start_ind = 0) {
-    return count(dims, start_ind, dims.size());
-}
-
-static void ref_topk(InferenceEngine::TBlob<float> &src, InferenceEngine::TBlob<float> &dst_data, InferenceEngine::TBlob<int> &dst_indx, topk_test_params p) {
-    float *src_data = src.data();
-    float* dst_val = dst_data.data();
-    int* dst_idx = dst_indx.data();
-
-    int dim, axis_dist;
-    int src_k = static_cast<int>(p.src_k[0]);
-
-
-    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();;
-    int axis_ = p.axis;
-    if (axis_ < 0)
-        axis_ += src_dims.size();
-
-    size_t axis = static_cast<size_t>(axis_);
-
-    if (src_dims.size() < (1 + axis))
-        FAIL() << " Incorrect input parameters dimensions and axis number!";
-
-    bool mode_max;
-    if (p.mode == "max")
-        mode_max = true;
-    else
-        mode_max = false;
-
-    bool sort_value;
-    if (p.sort == "value")
-        sort_value = true;
-    else
-        sort_value = false;
-
-    int j;
-    for (j = src_dims.size() - 1; j >= 0; j--) {
-        if (src_dims[j] != 1) break;
-    }
-    if (static_cast<size_t>(j) == axis) {
-        dim = count(src_dims, static_cast<size_t>(j));
-        axis_dist = 1;
-    } else {
-        int axis_ = (p.axis < 0) ? p.axis + static_cast<int>(src_dims.size()) : p.axis;
-        dim = static_cast<int>(src_dims[axis_]);
-        axis_dist = count(src_dims, axis_) / dim;
-    }
-
-    int num = count(src_dims) / dim;
-    std::vector<std::pair<float, int> > src_vector(src_k);
-
-    for (int i = 0; i < num; ++i) {
-        src_vector[0] = std::make_pair(src_data[(i / axis_dist * dim) * axis_dist + i % axis_dist], 0);
-        for (j = 1; j < src_k; ++j) {
-            src_vector[j] = std::make_pair(src_data[(i / axis_dist * dim + j) * axis_dist + i % axis_dist], j);
-            if (mode_max) {
-                if (src_vector[j].first > src_vector[j - 1].first)
-                    std::sort(src_vector.begin(), src_vector.begin() + j + 1, std::greater<std::pair<float, int> >());
-            } else {
-                if (src_vector[j].first < src_vector[0].first)
-                    std::sort(src_vector.begin(), src_vector.begin() + j + 1, std::less<std::pair<float, int> >());
-            }
-        }
-
-        for (; j < dim; ++j) {
-            float value = src_data[(i / axis_dist * dim + j) * axis_dist + i % axis_dist];
-            if (mode_max) {
-                if (value > src_vector[src_k - 1].first) {
-                    src_vector[src_k - 1] = std::make_pair(value, j);
-                    std::sort(src_vector.begin(), src_vector.end(), std::greater<std::pair<float, int> >());
-                }
-            } else {
-                if (value < src_vector[0].first) {
-                    src_vector[0] = std::make_pair(value, j);
-                    std::sort(src_vector.begin(), src_vector.end(), std::less<std::pair<float, int> >());
-                }
-            }
-        }
-
-        if (!sort_value)
-            std::sort(src_vector.begin(), src_vector.begin() + src_k, [](const pair<int, int> &a, const pair<int, int> &b)
-            { return (a.second < b.second); });
-
-        for (int j = 0; j < src_k; ++j) {
-            if (axis_dist != 1) {
-                // Produces max_val per axis
-                dst_val[(i / axis_dist * src_k + j) * axis_dist + i % axis_dist] = src_vector[j].first;
-                dst_idx[(i / axis_dist * src_k + j) * axis_dist + i % axis_dist] = src_vector[j].second;
-            } else {
-                // Produces max_ind and max_val
-                dst_val[i * src_k + j] = src_vector[j].first;
-                dst_idx[i * src_k + j] = src_vector[j].second;
-            }
-        }
-    }
-}
-
-
-class MKLDNNCPUExtTopKTests : public TestsCommon, public WithParamInterface<topk_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="TopK_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="value" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IN_
-                </port>
-            </output>
-        </layer>
-        <layer name="src_k" type="Input" precision="I32" id="2">
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="TopK" precision="FP32">
-            <data axis="_AXIS_" sort="_SORT_" mode="_MODE_"/>
-            <input>
-                <port id="1">
-                    _IN_
-                </port>
-                <port id="2">
-                    <dim>1</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _OUT_
-                </port>
-                <port id="4" precision="I32">
-                    _OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(topk_test_params p) {
-        std::string model = model_t;
-        std::string in_shape;
-        std::string out_shape;
-
-        for (size_t i = 0; i < p.out_shape.size(); i++) {
-            out_shape += "<dim>";
-            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_OUT_", out_shape);
-
-        for (auto& dct : p.in_shape) {
-            in_shape += "<dim>";
-            in_shape += std::to_string(dct) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_IN_", in_shape);
-        REPLACE_WITH_STR(model, "_SORT_", p.sort);
-        REPLACE_WITH_STR(model, "_MODE_", p.mode);
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            topk_test_params p = ::testing::WithParamInterface<topk_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core ie;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = ie.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            auto it = out.begin();
-            std::pair<std::string, InferenceEngine::DataPtr> item0 = *it;
-            std::pair<std::string, InferenceEngine::DataPtr> item1 = *(++it);
-
-            InferenceEngine::TBlob<float>::Ptr output0;
-            output0 = InferenceEngine::make_shared_blob<float>(item0.second->getTensorDesc());
-            output0->allocate();
-            outputBlobs[item0.first] = output0;
-
-            InferenceEngine::TBlob<int32_t>::Ptr output1;
-            output1 = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.out_shape) });
-            output1->allocate();
-            outputBlobs[item1.first] = output1;
-
-            // Input Data
-            InferenceEngine::Blob::Ptr src;
-            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
-            src->allocate();
-            if (p.input_tensor.size())
-                memcpy(src->buffer(), &p.input_tensor[0], sizeof(float)*p.input_tensor.size());
-            else
-                fill_data_dbgval(src->buffer(), src->size());
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("value", src));
-
-            InferenceEngine::Blob::Ptr seq_lengthsIdx;
-            InferenceEngine::SizeVector seq_lengths_dim(1, 1);
-            seq_lengthsIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
-            seq_lengthsIdx->allocate();
-            memcpy(static_cast<int32_t*>(seq_lengthsIdx->buffer()), &p.src_k[0], sizeof(int32_t));
-            auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(seq_lengthsIdx.get());
-            if (seq_lengthsIdxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("src_k", seq_lengthsIdx));
-
-            // Output Reference
-            InferenceEngine::TBlob<float> dst_data_ref(item0.second->getTensorDesc());
-            dst_data_ref.allocate();
-            InferenceEngine::TBlob<int> dst_indx_ref(item1.second->getTensorDesc());
-            dst_indx_ref.allocate();
-            ref_topk(*srcPtr, dst_data_ref, dst_indx_ref, p);
-
-            // Infer
-            graph.Infer(srcs, outputBlobs);
-            compare(*output0, dst_data_ref);
-            for (int i = 0; i < dst_indx_ref.size(); i++)
-                if (dst_indx_ref.data()[i] != (*output1).data()[i])
-                    FAIL() << "The difference between res_idx[i] and reference_idx[i]";
-
-            for (int i = 0; i < p.reference_val.size(); i++) {
-                if(p.reference_val.data()[i] != (*output0).data()[i])
-                    FAIL() << "The difference between res_val[i] and reference_val[i]";
-            }
-
-            for (int i = 0; i < p.reference_idx.size(); i++) {
-                if (p.reference_idx.data()[i] != (*output1).data()[i])
-                    FAIL() << "The difference between res_idx[i] and reference_idx[i]";
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtTopKTests, TestsTopK) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsTopK, MKLDNNCPUExtTopKTests,
-            ::testing::Values(
-// Params: in_shape, input_tensor, axis, src_k, sort, mode, out_shape, reference_val, reference_idx
-                topk_test_params{ { 3, 4 },{}, -1,{ 1 }, "value", "max",{ 3, 1 },{ 3,7,11 },{ 3,3,3 } },
-                topk_test_params{ { 3, 4 },{},  0,{ 1 }, "value", "max",{ 1, 4 },{ 8,9,10,11 },{ 2,2,2,2 } },
-                topk_test_params{ { 3, 4 },{}, -1,{ 1 }, "value", "min",{ 3, 1 },{ 0,4,8 },{ 0,0,0 } },
-                topk_test_params{ { 3, 4 },{},  0,{ 1 }, "value", "min",{ 1, 4 },{ 0,1,2,3 },{ 0,0,0,0 } },
-                topk_test_params{ { 2, 3, 128, 256 },{}, 1,{ 1 }, "value", "max",{ 2, 1, 128, 256 },{},{} },
-                topk_test_params{ { 3, 5, 128, 256 },{}, 1,{ 1 }, "index", "max",{ 3, 1, 128, 256 },{},{} },
-                topk_test_params{ { 1, 3, 129, 257 },{}, 1,{ 1 }, "value", "max",{ 1, 1, 129, 257 },{},{} },
-                topk_test_params{ { 2, 5, 129, 257 },{}, 1,{ 1 }, "index", "max",{ 2, 1, 129, 257 },{},{} },
-                topk_test_params{ { 3, 4 },{}, -1,{ 3 }, "value", "max",{ 3, 3 },{ 3,2,1,7,6,5,11,10,9 },{ 3,2,1,3,2,1,3,2,1 } },
-                topk_test_params{ { 3, 4 },{}, -1,{ 3 }, "value", "min",{ 3, 3 },{ 0,1,2,4,5,6,8,9,10 },{ 0,1,2,0,1,2,0,1,2 } },
-                topk_test_params{ { 1, 20, 128, 128 },{}, 1,{ 3 }, "value", "max",{ 1, 3, 128, 128 },{},{} },
-                topk_test_params{ { 1, 20, 128, 128 },{}, 1,{ 3 }, "index", "min",{ 1, 3, 128, 128 },{},{} },
-                topk_test_params{ { 1, 20, 128, 128 },{}, 1,{ 18 }, "value", "min",{ 1, 18, 128, 128 },{},{} },
-                topk_test_params{ { 1, 20, 129, 129 },{}, 1,{ 3 }, "value", "max",{ 1, 3, 129, 129 },{},{} },
-                topk_test_params{ { 1, 2, 2, 4 },{}, 3,{ 3 }, "value", "max",{ 1, 2, 2, 3 },{},{} },
-                topk_test_params{ { 1, 2, 2, 4 },{}, 3,{ 3 }, "index", "max",{ 1, 2, 2, 3 },{},{} },
-                topk_test_params{ { 1, 2, 2, 4 },{}, 3,{ 3 }, "value", "min",{ 1, 2, 2, 3 },{},{} },
-                topk_test_params{ { 1, 2, 2, 4 },{}, 3,{ 3 }, "index", "min",{ 1, 2, 2, 3 },{},{} },
-                topk_test_params{ { 1, 2, 2, 4 },{}, 3,{ 1 }, "value", "max",{ 1, 2, 2, 1 },{},{} },
-                topk_test_params{ { 1, 2, 2, 4 },{}, 3,{ 1 }, "index", "max",{ 1, 2, 2, 1 },{},{} },
-                topk_test_params{ { 1, 2, 4, 2 },{}, 2,{ 3 }, "value", "max",{ 1, 2, 3, 2 },{},{} },
-                topk_test_params{ { 1, 2, 4, 2 },{}, 2,{ 3 }, "index", "max",{ 1, 2, 3, 2 },{},{} },
-                topk_test_params{ { 1, 2, 4, 2 },{}, 2,{ 3 }, "value", "min",{ 1, 2, 3, 2 },{},{} },
-                topk_test_params{ { 1, 2, 4, 2 },{}, 2,{ 3 }, "index", "min",{ 1, 2, 3, 2 },{},{} },
-                topk_test_params{ { 1, 2, 2, 4 },{3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3}, 3,{ 3 }, "index", "min",{ 1, 2, 2, 3 },{3,3,3,3,3,3,3,3,3,3,3,3},{0,1,2,0,1,2,0,1,2,0,1,2} },
-                topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "index", "max",{ 1, 2, 2, 3 },{ 3,3,3,3,3,3,3,3,3,3,3,3 },{ 0,1,2,0,1,2,0,1,2,0,1,2 } },
-                topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "value", "min",{ 1, 2, 2, 3 },{ 3,3,3,3,3,3,3,3,3,3,3,3 },{ 0,1,2,0,1,2,0,1,2,0,1,2 } },
-                topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "value", "max",{ 1, 2, 2, 3 },{ 3,3,3,3,3,3,3,3,3,3,3,3 },{ 0,1,2,0,1,2,0,1,2,0,1,2 } },
-                topk_test_params{ { 1, 20, 32, 32 },{}, 1,{ 18 }, "index", "max",{ 1, 18, 32, 32 },{},{} },
-                topk_test_params{ { 1, 20, 129, 129 },{}, 1,{ 18 }, "index", "max",{ 1, 18, 129, 129 },{},{} },
-                topk_test_params{ { 1, 20, 32, 32 },{}, 1,{ 18 }, "index", "min",{ 1, 18, 32, 32 },{},{} },
-                topk_test_params{ { 1, 20, 129, 129 },{}, 1,{ 18 }, "index", "min",{ 1, 18, 129, 129 },{},{} },
-                topk_test_params{ { 1, 20, 129, 129 },{}, 1,{ 18 }, "none", "min",{ 1, 18, 129, 129 },{},{} }
-            ));
-
-
-class MKLDNNCPUExtTopK1OutTests : public TestsCommon, public WithParamInterface<topk_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="TopK_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="value" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IN_
-                </port>
-            </output>
-        </layer>
-        <layer name="src_k" type="Input" precision="I32" id="2">
-            <output>
-                <port id="2"/>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="TopK" precision="_PRECISION_">
-            <data axis="_AXIS_" sort="_SORT_" mode="_MODE_"/>
-            <input>
-                <port id="1">
-                    _IN_
-                </port>
-                <port id="2">
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    _OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(topk_test_params p) {
-        std::string model = model_t;
-        std::string in_shape;
-        std::string out_shape;
-
-        for (size_t i = 0; i < p.out_shape.size(); i++) {
-            out_shape += "<dim>";
-            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_OUT_", out_shape);
-
-        for (auto& dct : p.in_shape) {
-            in_shape += "<dim>";
-            in_shape += std::to_string(dct) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_IN_", in_shape);
-        REPLACE_WITH_STR(model, "_SORT_", p.sort);
-        REPLACE_WITH_STR(model, "_MODE_", p.mode);
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        if (p.reference_val.size())
-            REPLACE_WITH_STR(model, "_PRECISION_", "FP32");
-        else
-            REPLACE_WITH_STR(model, "_PRECISION_", "I32");
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            topk_test_params p = ::testing::WithParamInterface<topk_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            // Input Data
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape,
-                                                                                        InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
-            src->allocate();
-            if (p.input_tensor.size())
-                memcpy(src->buffer(), &p.input_tensor[0], sizeof(float)*p.input_tensor.size());
-            else
-                fill_data_dbgval(src->buffer(), src->size());
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("value", src));
-            InferenceEngine::Blob::Ptr seq_lengthsIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, {},
-                                                                                                     InferenceEngine::TensorDesc::getLayoutByDims({})});
-            seq_lengthsIdx->allocate();
-            memcpy(static_cast<int32_t*>(seq_lengthsIdx->buffer()), &p.src_k[0], sizeof(int32_t));
-            auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(seq_lengthsIdx.get());
-            if (seq_lengthsIdxPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
-
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("src_k", seq_lengthsIdx));
-
-
-            // Output Data
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-            auto it = out.begin();
-            std::pair<std::string, InferenceEngine::DataPtr> item = *it;
-
-            if (p.reference_val.size()) {
-                InferenceEngine::TBlob<float>::Ptr output;
-                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                // Infer
-                graph.Infer(srcs, outputBlobs);
-                for (int i = 0; i < p.reference_val.size(); i++) {
-                    if (p.reference_val.data()[i] != (*output).data()[i])
-                        FAIL() << "The difference between res_val[i] and reference_val[i]";
-                }
-            } else {
-                InferenceEngine::TBlob<int32_t>::Ptr output;
-                output = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.out_shape,
-                                                                      InferenceEngine::TensorDesc::getLayoutByDims(p.out_shape) });
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                // Infer
-                graph.Infer(srcs, outputBlobs);
-                for (int i = 0; i < p.reference_idx.size(); i++) {
-                    if (p.reference_idx.data()[i] != (*output).data()[i])
-                        FAIL() << "The difference between res_val[i] and reference_idx[i]";
-                }
-            }
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtTopK1OutTests, TestsTopK) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsTopK1Out, MKLDNNCPUExtTopK1OutTests,
-    ::testing::Values(
-        // Params: in_shape, input_tensor, axis, src_k, sort, mode, out_shape, reference_val, reference_idx
-        topk_test_params{ { 3, 4 },{}, -1,{ 1 }, "value", "max",{ 3, 1 },{ 3,7,11 },{} },
-        topk_test_params{ { 3, 4 },{}, -1,{ 1 }, "value", "max",{ 3, 1 },{},{ 3,3,3 } },
-        topk_test_params{ { 3, 4 },{},  0,{ 1 }, "value", "max",{ 1, 4 },{ 8,9,10,11 },{} },
-        topk_test_params{ { 3, 4 },{},  0,{ 1 }, "value", "max",{ 1, 4 },{},{ 2,2,2,2 } },
-        topk_test_params{ { 3, 4 },{}, -1,{ 1 }, "value", "min",{ 3, 1 },{ 0,4,8 },{} },
-        topk_test_params{ { 3, 4 },{}, -1,{ 1 }, "value", "min",{ 3, 1 },{},{ 0,0,0 } },
-        topk_test_params{ { 3, 4 },{},  0,{ 1 }, "value", "min",{ 1, 4 },{ 0,1,2,3 },{} },
-        topk_test_params{ { 3, 4 },{},  0,{ 1 }, "value", "min",{ 1, 4 },{},{ 0,0,0,0 } },
-        topk_test_params{ { 3, 4 },{}, -1,{ 3 }, "value", "max",{ 3, 3 },{ 3,2,1,7,6,5,11,10,9 },{} },
-        topk_test_params{ { 3, 4 },{}, -1,{ 3 }, "value", "max",{ 3, 3 },{},{ 3,2,1,3,2,1,3,2,1 } },
-        topk_test_params{ { 3, 4 },{}, -1,{ 3 }, "value", "min",{ 3, 3 },{ 0,1,2,4,5,6,8,9,10 },{} },
-        topk_test_params{ { 3, 4 },{}, -1,{ 3 }, "value", "min",{ 3, 3 },{},{ 0,1,2,0,1,2,0,1,2 } },
-        topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "index", "min",{ 1, 2, 2, 3 },{ 3,3,3,3,3,3,3,3,3,3,3,3 },{} },
-        topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "index", "min",{ 1, 2, 2, 3 },{},{ 0,1,2,0,1,2,0,1,2,0,1,2 } },
-        topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "index", "max",{ 1, 2, 2, 3 },{ 3,3,3,3,3,3,3,3,3,3,3,3 },{} },
-        topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "index", "max",{ 1, 2, 2, 3 },{},{ 0,1,2,0,1,2,0,1,2,0,1,2 } },
-        topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "value", "min",{ 1, 2, 2, 3 },{ 3,3,3,3,3,3,3,3,3,3,3,3 },{} },
-        topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "value", "min",{ 1, 2, 2, 3 },{},{ 0,1,2,0,1,2,0,1,2,0,1,2 } },
-        topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "value", "max",{ 1, 2, 2, 3 },{ 3,3,3,3,3,3,3,3,3,3,3,3 },{} },
-        topk_test_params{ { 1, 2, 2, 4 },{ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }, 3,{ 3 }, "value", "max",{ 1, 2, 2, 3 },{},{ 0,1,2,0,1,2,0,1,2,0,1,2 } }
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp
deleted file mode 100644
index da25e3ee7c9..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp
+++ /dev/null
@@ -1,370 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-#include <algorithm>
-#include <vector>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct unique_test_params {
-    std::string model;
-
-    std::string precision;
-
-    std::string sorted;
-    std::string return_inverse;
-    std::string return_counts;
-
-    InferenceEngine::SizeVector input_shape;
-    std::vector<float> input_value;
-
-    InferenceEngine::SizeVector output_uniques_shape;
-    InferenceEngine::SizeVector output_indices_shape;
-    InferenceEngine::SizeVector output_counts_shape;
-
-    std::vector<float> output_uniques_value_ref;
-    std::vector<float> output_indices_value_ref;
-    std::vector<float> output_counts_value_ref;
-
-    size_t num_prim_desc;
-    int selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNCPUExtUniqueTests : public TestsCommon, public WithParamInterface<unique_test_params> {
-    std::string getModel(unique_test_params p) {
-        std::string model = p.model;
-
-        std::string input_shape;
-        std::string output_uniques_shape;
-        std::string output_indices_shape;
-        std::string output_counts_shape;
-
-        for (auto& shape : p.input_shape) {
-            input_shape += "<dim>";
-            input_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : p.output_uniques_shape) {
-            output_uniques_shape += "<dim>";
-            output_uniques_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : p.output_indices_shape) {
-            output_indices_shape += "<dim>";
-            output_indices_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        for (auto& shape : p.output_counts_shape) {
-            output_counts_shape += "<dim>";
-            output_counts_shape += std::to_string(shape) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "_SORTED_", p.sorted);
-        REPLACE_WITH_STR(model, "_INPUT_SHAPE_", input_shape);
-        REPLACE_WITH_STR(model, "_OUTPUT_UNIQUES_SHAPE_", output_uniques_shape);
-        REPLACE_WITH_STR(model, "_OUTPUT_INDICES_SHAPE_", output_indices_shape);
-        REPLACE_WITH_STR(model, "_OUTPUT_COUNTS_SHAPE_", output_counts_shape);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            unique_test_params p = ::testing::WithParamInterface<unique_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-
-            for (auto &node : nodes) {
-                if (node->getName() == "Unique") {
-                    ASSERT_EQ(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                        node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            // prepare input blob and input blob map
-            InferenceEngine::Blob::Ptr input = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                p.input_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.input_shape) });
-            input->allocate();
-            auto *input_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(input.get());
-            std::copy(p.input_value.begin(), p.input_value.end(), (float *)input_ptr->data());
-            InferenceEngine::BlobMap input_blob_map;
-            input_blob_map["InputValues"] = input;
-
-            // prepare output blob map
-            InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-            InferenceEngine::BlobMap output_blob_map;
-            for (auto iter = out.begin(); iter != out.end(); iter++) {
-                std::pair<std::string, InferenceEngine::DataPtr> item = *iter;
-                InferenceEngine::Blob::Ptr output_blob_ptr = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output_blob_ptr->allocate();
-                output_blob_map[item.first] = output_blob_ptr;
-            }
-
-            // prepare blobs with reference data
-            InferenceEngine::Blob::Ptr output_uniques_blob_ref = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                 p.output_uniques_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_uniques_shape) });
-            output_uniques_blob_ref->allocate();
-            auto *output_uniques_blob_ref_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(output_uniques_blob_ref.get());
-            std::copy(p.output_uniques_value_ref.begin(), p.output_uniques_value_ref.end(), (float *)output_uniques_blob_ref_ptr->data());
-
-            InferenceEngine::Blob::Ptr output_indices_blob_ref = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                 p.output_indices_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_indices_shape) });
-            output_indices_blob_ref->allocate();
-            auto *output_indices_blob_ref_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(output_indices_blob_ref.get());
-            std::copy(p.output_indices_value_ref.begin(), p.output_indices_value_ref.end(), (float *)output_indices_blob_ref_ptr->data());
-
-            InferenceEngine::Blob::Ptr output_counts_blob_ref = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
-                 p.output_counts_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.output_counts_shape) });
-            output_counts_blob_ref->allocate();
-            auto *output_counts_blob_ref_ptr = dynamic_cast<InferenceEngine::TBlob<float>*>(output_counts_blob_ref.get());
-            std::copy(p.output_counts_value_ref.begin(), p.output_counts_value_ref.end(), (float *)output_counts_blob_ref_ptr->data());
-
-            // infer
-            graph.Infer(input_blob_map, output_blob_map);
-
-            // check the result
-            auto iter = out.begin();
-            compare(*output_blob_map[iter->first], *output_uniques_blob_ref, 0.0f);
-            if (p.return_inverse == "true") {
-                iter++;
-                compare(*output_blob_map[iter->first], *output_indices_blob_ref, 0.0f);
-            }
-            if (p.return_counts == "true") {
-                iter++;
-                compare(*output_blob_map[iter->first], *output_counts_blob_ref, 0.0f);
-            }
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNCPUExtUniqueTests, TestsUnique) {}
-
-// model 1 that contains one Unique layer with two outputs: unique elements, indices
-std::string model1 = R"V0G0N(
-<net Name="Unique_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputValues" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    _INPUT_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="UniqueLayer" id="1" type="Unique" precision="FP32">
-            <data return_counts="false" return_inverse="true" sorted="_SORTED_"/>
-            <input>
-                <port id="0">
-                    _INPUT_SHAPE_
-                </port>
-            </input>
-            <output>
-                <port id="0">
-                    _OUTPUT_UNIQUES_SHAPE_
-                </port>
-                <port id="1">
-                    _OUTPUT_INDICES_SHAPE_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-// model 2 that contains one Unique layer with three outputs: unique elements, indices, counts
-std::string model2 = R"V0G0N(
-<net Name="Unique_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="InputValues" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    _INPUT_SHAPE_
-                </port>
-            </output>
-        </layer>
-        <layer name="UniqueLayer" id="1" type="Unique" precision="FP32">
-            <data return_counts="true" return_inverse="true" sorted="_SORTED_"/>
-            <input>
-                <port id="0">
-                    _INPUT_SHAPE_
-                </port>
-            </input>
-            <output>
-                <port id="0">
-                    _OUTPUT_UNIQUES_SHAPE_
-                </port>
-                <port id="1">
-                    _OUTPUT_INDICES_SHAPE_
-                </port>
-                <port id="2">
-                    _OUTPUT_COUNTS_SHAPE_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-// case 1 - input with 10 elements where some of them repeat, non-sorted
-InferenceEngine::SizeVector input_shape_case1 = { 10 };
-std::vector<float>          input_value_case1 = { 8.f, 1.f, 2.f, 1.f, 8.f, 5.f, 1.f, 5.f, 0.f, 0.f };
-InferenceEngine::SizeVector output_uniques_shape_case1 = { 10 };
-InferenceEngine::SizeVector output_indicess_shape_case1 = { 10 };
-InferenceEngine::SizeVector output_counts_shape_case1 = { 10 };
-std::vector<float>          output_uniques_value_ref_case1 = { 8.f, 1.f, 2.f, 5.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
-std::vector<float>          output_indices_value_ref_case1 = { 0.f, 1.f, 2.f, 1.f, 0.f, 3.f, 1.f, 3.f, 4.f, 4.f };
-std::vector<float>          output_counts_value_ref_case1 = { 2.f, 3.f, 1.f, 2.f, 2.f, 0.f, 0.f, 0.f, 0.f, 0.f };
-
-// case 2 - input with 10 elements where all of them are unique, non-sorted
-InferenceEngine::SizeVector input_shape_case2 = { 10 };
-std::vector<float>          input_value_case2 = { 8.f, 1.f, 2.f, 3.f, 10.f, 5.f, 12.f, 15.f, 0.f, 100.f };
-InferenceEngine::SizeVector output_uniques_shape_case2 = { 10 };
-InferenceEngine::SizeVector output_indicess_shape_case2 = { 10 };
-InferenceEngine::SizeVector output_counts_shape_case2 = { 10 };
-std::vector<float>          output_uniques_value_ref_case2 = { 8.f, 1.f, 2.f, 3.f, 10.f, 5.f, 12.f, 15.f, 0.f, 100.f };
-std::vector<float>          output_indices_value_ref_case2 = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f };
-std::vector<float>          output_counts_value_ref_case2 = { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f };
-
-// case 3 - input with 10 elements where all of them are the same, non-sorted
-InferenceEngine::SizeVector input_shape_case3 = { 10 };
-std::vector<float>          input_value_case3 = { 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f };
-InferenceEngine::SizeVector output_uniques_shape_case3 = { 10 };
-InferenceEngine::SizeVector output_indicess_shape_case3 = { 10 };
-InferenceEngine::SizeVector output_counts_shape_case3 = { 10 };
-std::vector<float>          output_uniques_value_ref_case3 = { 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f };
-std::vector<float>          output_indices_value_ref_case3 = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
-std::vector<float>          output_counts_value_ref_case3 = { 10.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
-
-// case 4 - input with 10 elements where some of them repeat, sorted
-InferenceEngine::SizeVector input_shape_case4 = { 10 };
-std::vector<float>          input_value_case4 = { 8.f, 1.f, 2.f, 1.f, 8.f, 5.f, 1.f, 5.f, 0.f, 0.f };
-InferenceEngine::SizeVector output_uniques_shape_case4 = { 10 };
-InferenceEngine::SizeVector output_indicess_shape_case4 = { 10 };
-InferenceEngine::SizeVector output_counts_shape_case4 = { 10 };
-std::vector<float>          output_uniques_value_ref_case4 = { 0.f, 1.f, 2.f, 5.f, 8.f, 8.f, 8.f, 8.f, 8.f, 8.f };
-std::vector<float>          output_indices_value_ref_case4 = { 4.f, 1.f, 2.f, 1.f, 4.f, 3.f, 1.f, 3.f, 0.f, 0.f };
-std::vector<float>          output_counts_value_ref_case4 = { 2.f, 3.f, 1.f, 2.f, 2.f, 0.f, 0.f, 0.f, 0.f, 0.f };
-
-// case 5 - input with 10 elements where all of them are unique, sorted
-InferenceEngine::SizeVector input_shape_case5 = { 10 };
-std::vector<float>          input_value_case5 = { 8.f, 1.f, 2.f, 3.f, 10.f, 5.f, 12.f, 15.f, 0.f, 100.f };
-InferenceEngine::SizeVector output_uniques_shape_case5 = { 10 };
-InferenceEngine::SizeVector output_indicess_shape_case5 = { 10 };
-InferenceEngine::SizeVector output_counts_shape_case5 = { 10 };
-std::vector<float>          output_uniques_value_ref_case5 = { 0.f, 1.f, 2.f, 3.f, 5.f, 8.f, 10.f, 12.f, 15.f, 100.f };
-std::vector<float>          output_indices_value_ref_case5 = { 5.f, 1.f, 2.f, 3.f, 6.f, 4.f, 7.f, 8.f, 0.f, 9.f };
-std::vector<float>          output_counts_value_ref_case5 = { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f };
-
-INSTANTIATE_TEST_CASE_P(
-    TestsUnique, MKLDNNCPUExtUniqueTests,
-    ::testing::Values(
-        // case 0 - model1, sorted="false", input with 10 elements where some of them repeat
-        unique_test_params {
-            model1, "FP32", "false", "true", "false", input_shape_case1, input_value_case1,
-            output_uniques_shape_case1, output_indicess_shape_case1, output_counts_shape_case1,
-            output_uniques_value_ref_case1, output_indices_value_ref_case1, output_counts_value_ref_case1,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 1 - model1, sorted="false", input with 10 elements where all of them are unique
-        unique_test_params{
-            model1, "FP32", "false", "true", "false", input_shape_case2, input_value_case2,
-            output_uniques_shape_case2, output_indicess_shape_case2, output_counts_shape_case2,
-            output_uniques_value_ref_case2, output_indices_value_ref_case2, output_counts_value_ref_case2,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 2 - model1, sorted="false", input with 10 elements where all of them are the same
-        unique_test_params{
-            model1, "FP32", "false", "true", "false", input_shape_case3, input_value_case3,
-            output_uniques_shape_case3, output_indicess_shape_case3, output_counts_shape_case3,
-            output_uniques_value_ref_case3, output_indices_value_ref_case3, output_counts_value_ref_case3,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 3 - model1, sorted="true", input with 10 elements where some of them repeat
-        unique_test_params{
-            model1, "FP32", "true", "true", "false", input_shape_case4, input_value_case4,
-            output_uniques_shape_case4, output_indicess_shape_case4, output_counts_shape_case4,
-            output_uniques_value_ref_case4, output_indices_value_ref_case4, output_counts_value_ref_case4,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 4 - model1, sorted="true", input with 10 elements where all of them are unique
-        unique_test_params{
-            model1, "FP32", "true", "true", "false", input_shape_case5, input_value_case5,
-            output_uniques_shape_case5, output_indicess_shape_case5, output_counts_shape_case5,
-            output_uniques_value_ref_case5, output_indices_value_ref_case5, output_counts_value_ref_case5,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 5 - model2, sorted="false", input with 10 elements where some of them repeat
-        unique_test_params{
-            model2, "FP32", "false", "true", "true", input_shape_case1, input_value_case1,
-            output_uniques_shape_case1, output_indicess_shape_case1, output_counts_shape_case1,
-            output_uniques_value_ref_case1, output_indices_value_ref_case1, output_counts_value_ref_case1,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 6 - model2, sorted="false", input with 10 elements where all of them are unique
-        unique_test_params{
-            model2, "FP32", "false", "true", "true", input_shape_case2, input_value_case2,
-            output_uniques_shape_case2, output_indicess_shape_case2, output_counts_shape_case2,
-            output_uniques_value_ref_case2, output_indices_value_ref_case2, output_counts_value_ref_case2,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 7 - model2, sorted="false", input with 10 elements where all of them are the same
-        unique_test_params{
-            model2, "FP32", "false", "true", "true", input_shape_case3, input_value_case3,
-            output_uniques_shape_case3, output_indicess_shape_case3, output_counts_shape_case3,
-            output_uniques_value_ref_case3, output_indices_value_ref_case3, output_counts_value_ref_case3,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 8 - model2, sorted="true", input with 10 elements where some of them repeat
-        unique_test_params{
-            model2, "FP32", "true", "true", "true", input_shape_case4, input_value_case4,
-            output_uniques_shape_case4, output_indicess_shape_case4, output_counts_shape_case4,
-            output_uniques_value_ref_case4, output_indices_value_ref_case4, output_counts_value_ref_case4,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        },
-        // case 9 - model2, sorted="true", input with 10 elements where all of them are unique
-        unique_test_params{
-            model2, "FP32", "true", "true", "true", input_shape_case5, input_value_case5,
-            output_uniques_shape_case5, output_indicess_shape_case5, output_counts_shape_case5,
-            output_uniques_value_ref_case5, output_indices_value_ref_case5, output_counts_value_ref_case5,
-            1, MKLDNNPlugin::impl_desc_type::unknown
-        }
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
deleted file mode 100644
index a2f9f7b8d8c..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
+++ /dev/null
@@ -1,422 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-constexpr auto eltwise_relu = mkldnn::algorithm::eltwise_relu;
-constexpr auto eltwise_elu = mkldnn::algorithm::eltwise_elu;
-constexpr auto eltwise_logistic = mkldnn::algorithm::eltwise_logistic;
-constexpr auto eltwise_bounded_relu = mkldnn::algorithm::eltwise_bounded_relu;
-constexpr auto eltwise_tanh = mkldnn::algorithm::eltwise_tanh;
-
-struct activation_test_params {
-    mkldnn::algorithm alg;
-    float alpha;
-    float beta;
-
-    // Formats: NCHW, NCDHW
-    vector<size_t> dims;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename T, typename A> inline T relu_fwd(T s, A alpha) {
-    return s > 0 ? s : static_cast<T>(s * alpha);
-}
-
-template <typename T, typename A> T elu_fwd(T s, A alpha) {
-    return s > 0 ? s : static_cast<T>(alpha * (::expf(s) - 1));
-}
-
-template <typename T>
-T logistic_fwd(T s) {
-    T v = ::expf(s);
-    return v / (v + 1);
-}
-
-template <typename T, typename A>
-T bounded_relu_fwd(T s, A alpha) {
-    s = s > 0 ? s : 0;
-    return s > alpha ? (T)(alpha) : s;
-}
-
-template <typename T> T tanh_fwd(T s) {
-    return static_cast<T>(::tanhf((float)s));
-}
-
-template <typename data_t>
-void ref_activation(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst, activation_test_params prm) {
-    InferenceEngine::SizeVector dims = src.getTensorDesc().getDims();
-    auto dims_size = dims.size();
-    
-    size_t IW = dims[dims_size - 1];
-    size_t IH = dims[dims_size - 2];
-    size_t ID = dims_size == 5 ? dims[dims_size - 3] : 1u;
-    size_t IC = dims[1];
-    size_t MB = dims[0];
-
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    for(int mb = 0; mb < MB; mb++) {
-        for(int c = 0; c < IC; c++) {
-            for(int d = 0; d < ID; d++) {
-                for(int h = 0; h < IH; h++) {
-                    for(int w = 0; w < IW; w++) {
-                        int idx = mb * IC * ID * IH * IW
-                                  + c * ID * IH * IW
-                                  + d * IH * IW
-                                  + h * IW
-                                  + w;
-
-                        switch (prm.alg) {
-                            case eltwise_relu:         dst_data[idx] = relu_fwd(src_data[idx], prm.alpha);         break;
-                            case eltwise_elu:          dst_data[idx] = elu_fwd(src_data[idx], prm.alpha);          break;
-                            case eltwise_logistic:     dst_data[idx] = logistic_fwd(src_data[idx]);                break;
-                            case eltwise_bounded_relu: dst_data[idx] = bounded_relu_fwd(src_data[idx], prm.alpha); break;
-                            case eltwise_tanh:         dst_data[idx] = tanh_fwd(src_data[idx]); break;
-                            default: assert(!"unknown alg_kind");
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNGraphActivationTests: public TestsCommon,
-                                     public WithParamInterface<activation_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Activation" version="3" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="activation" id="1" type="_LT_" precision="FP32">
-            <data _P1_ _P2_ PrimitivesPriority="_IMPLS_"/>
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    virtual void TearDown() {
-    }
-
-    std::string getModel(activation_test_params p) {
-        std::string model = model_t;
-        auto dims_size = p.dims.size();
-
-        switch (dims_size) {
-            case 3:
-                REMOVE_LINE(model, "<dim>_IH_</dim>");
-            case 4:
-                REMOVE_LINE(model, "<dim>_ID_</dim>");
-        }
-
-        switch (p.alg) {
-            case eltwise_relu:         REPLACE_WITH_STR(model, "_LT_", "ReLU"); break;
-            case eltwise_elu:          REPLACE_WITH_STR(model, "_LT_", "ELU"); break;
-            case eltwise_logistic:     REPLACE_WITH_STR(model, "_LT_", "Sigmoid"); break;
-            case eltwise_bounded_relu: REPLACE_WITH_STR(model, "_LT_", "ReLU6"); break;
-            case eltwise_tanh:         REPLACE_WITH_STR(model, "_LT_", "Activation"); break;
-            default: assert(!"unknown alg_kind");
-        }
-
-        string P1, P2;
-        if (p.alg == eltwise_relu) {
-            P1 = string("negative_slope=\"") + to_string_c_locale(p.alpha) + string("\"");
-            P2 = string("beta=\"") + to_string_c_locale(p.beta) + string("\"");
-        } else if (p.alg == eltwise_bounded_relu) {
-            P1 = string("n=\"") + to_string_c_locale(p.alpha) + string("\"");
-            P2 = string("beta=\"") + to_string_c_locale(p.beta) + string("\"");
-        } else if (p.alg == eltwise_tanh) {
-            P1 = string("type=\"tanh\"");
-        } else {
-            P1 = string("alpha=\"") + to_string_c_locale(p.alpha) + string("\"");
-            P2 = string("beta=\"") + to_string_c_locale(p.beta) + string("\"");
-        }
-        REPLACE_WITH_STR(model, "_P1_", P1);
-        REPLACE_WITH_STR(model, "_P2_", P2);
-
-        REPLACE_WITH_NUM(model, "_IW_", p.dims[dims_size - 1]);
-        REPLACE_WITH_NUM(model, "_IC_", p.dims[1]);
-        REPLACE_WITH_NUM(model, "_IN_", p.dims[0]);
-        switch (dims_size) {
-            case 5:
-                REPLACE_WITH_NUM(model, "_ID_", p.dims[dims_size - 3]);
-            case 4:
-                REPLACE_WITH_NUM(model, "_IH_", p.dims[dims_size - 2]);
-        }
-
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-
-        return model;
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            activation_test_params p = ::testing::WithParamInterface<activation_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Activation) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                              nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = p.dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_activation(*srcPtr, dst_ref, p);
-
-            compare(*output, dst_ref, 0.0005f);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphActivationTests, TestsActivation) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsActivation, MKLDNNGraphActivationTests,
-        ::testing::Values(
-                activation_test_params{eltwise_relu, 0.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_relu, 0.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_relu, 0.5f, 0.5f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_relu, 0.5f, 0.5f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_elu, 0.5f, 0.5f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_elu, 0.5f, 0.5f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_elu, 1.0f, 1.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_elu, 1.0f, 1.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_logistic, 0.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_logistic, 0.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_bounded_relu, 6.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_bounded_relu, 6.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_bounded_relu, 0.1f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_bounded_relu, 0.1f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_relu, 0.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_relu, 0.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_relu, 0.5f, 0.5f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_relu, 0.5f, 0.5f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_elu, 0.5f, 0.5f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_elu, 0.5f, 0.5f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_elu, 1.0f, 1.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_elu, 1.0f, 1.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_logistic, 0.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_logistic, 0.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_bounded_relu, 6.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_bounded_relu, 6.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_bounded_relu, 0.1f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_bounded_relu, 0.1f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // 5D
-                activation_test_params{eltwise_tanh, 0.f, 0.f, {1, 1, 64, 64, 64}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}
-        ));
-
-class MKLDNNGraphDynBatchActivationTests: public MKLDNNGraphActivationTests {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            activation_test_params p = ::testing::WithParamInterface<activation_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.dims[0];
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = p.dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkActivation = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Activation;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkActivation);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkActivation);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchActivationTests, TestsDynBatchActivation) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDynBatchActivation, MKLDNNGraphDynBatchActivationTests,
-        ::testing::Values(
-                activation_test_params{eltwise_relu, 0.0f, 0.0f, {2, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_relu, 0.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_relu, 0.5f, 0.5f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_elu, 0.5f, 0.5f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_elu, 1.0f, 1.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_logistic, 0.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_bounded_relu, 6.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_bounded_relu, 0.1f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                activation_test_params{eltwise_relu, 0.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_relu, 0.5f, 0.5f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_elu, 0.5f, 0.5f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_elu, 1.0f, 1.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_logistic, 0.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_bounded_relu, 6.0f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                activation_test_params{eltwise_bounded_relu, 0.1f, 0.0f, {4, 3, 228, 228}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}
-                // TODO: rewrite to ngraph to have reshape functionality
-                // activation_test_params{eltwise_relu, 0.5f, 0.5f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // activation_test_params{eltwise_elu, 0.5f, 0.5f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // activation_test_params{eltwise_elu, 1.0f, 1.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // activation_test_params{eltwise_logistic, 0.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // activation_test_params{eltwise_bounded_relu, 6.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // activation_test_params{eltwise_bounded_relu, 0.1f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // activation_test_params{eltwise_relu, 0.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // activation_test_params{eltwise_relu, 0.5f, 0.5f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // activation_test_params{eltwise_elu, 0.5f, 0.5f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // activation_test_params{eltwise_elu, 1.0f, 1.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // activation_test_params{eltwise_logistic, 0.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // activation_test_params{eltwise_bounded_relu, 6.0f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // activation_test_params{eltwise_bounded_relu, 0.1f, 0.0f, {1, 32, 128, 256}, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp
deleted file mode 100644
index c36beb6e6eb..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-#include <ie_system_conf.h>
-
-using namespace ::testing;
-using namespace mkldnn;
-
-struct batchnorm_scaleshift_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in;
-
-    // BatchNorm specific param
-    double epsilon;
-    // ScaleShift specific param
-    int broadcast;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_batchnorm4DWithScale(const InferenceEngine::TBlob<data_t> &src, const data_t *variance, const data_t *mean, const data_t *scaleShift,
-                              InferenceEngine::TBlob<data_t> &dst, double eps) {
-    size_t MB = src.getTensorDesc().getDims()[0];
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t IH = src.getTensorDesc().getDims()[2];
-    size_t IW = src.getTensorDesc().getDims()[3];
-
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    const data_t *scale_data = scaleShift;
-    const data_t *shift_data = scaleShift + IC;
-
-    for (int c = 0; c < IC; ++c) {
-        data_t v_mean = mean[c];
-        data_t v_variance = variance[c];
-        data_t sqrt_variance = 0;
-        data_t scale = scale_data[c];
-        data_t shift = shift_data[c];
-
-        sqrt_variance = 1. / sqrt(v_variance + eps);
-
-        for (int n = 0; n < MB; ++n)
-            for (int h = 0; h < IH; ++h)
-                for (int w = 0; w < IW; ++w) {
-                    size_t idx = n * IC * IH * IW
-                                 + c * IH * IW
-                                 + h * IW + w;
-                    // BatchNorm
-                    dst_data[idx] = (src_data[idx] - v_mean) * sqrt_variance;
-                    // ScaleShift
-                    dst_data[idx] = dst_data[idx] * scale + shift;
-                }
-    }
-}
-
-class MKLDNNGraphBatchNormScaleShiftTests: public TestsCommon,
-                                     public WithParamInterface<batchnorm_scaleshift_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="BatchNorm_With_Scale_Fusion" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="batchNorm" id="1" type="BatchNormalization" precision="FP32">
-            <batch_norm_data epsilon="_EPSILON_" PrimitivesPriority="_IMPLS_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S1_" />
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-         <layer name="scaleshift" id="2" type="ScaleShift" precision="FP32">
-            <scale_shift_data broadcast="_BROADCAST_" PrimitivesPriority="_IMPLS_"/>
-
-            <weights offset="_S2_" size="_S1_" />
-            <biases offset="_S3_" size="_S1_" />
-
-            <input>
-                <port id="3">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-       <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-       <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    virtual void TearDown() {
-    }
-
-    std::string getModel(batchnorm_scaleshift_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-        REPLACE_WITH_NUM(model, "_EPSILON_", p.epsilon);
-        REPLACE_WITH_NUM(model, "_BROADCAST_", p.broadcast);
-
-        size_t w_data_size = p.in.c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", 2*w_data_size);
-        REPLACE_WITH_NUM(model, "_S3_", 3*w_data_size);
-
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-
-        return model;
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            batchnorm_scaleshift_test_params p = ::testing::WithParamInterface<batchnorm_scaleshift_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {p.in.c * 4 * sizeof(float)}, InferenceEngine::C });
-            weights->allocate();
-            fill_data( weights->data().as<float*>(), weights->size() / sizeof(float));
-            float * data = weights->buffer();
-            for (size_t i = 0; i < weights->size() / sizeof(float); i++) {
-                if (data[i] < 0) {
-                    data[i] *= -1;
-                }
-            }
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-            
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if ((nodes[i]->getType() == MKLDNNPlugin::Depthwise && nodes[i]->getCnnLayer()->type == "ScaleShift")
-                    || nodes[i]->getType() == MKLDNNPlugin::BatchNormalization) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_TRUE(nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() | p.selectedType);
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_batchnorm4DWithScale(*srcPtr, (const float*) weights->buffer(), ((const float*) weights->buffer() + p.in.c), (const float*) weights->buffer() + p.in.c*2, dst_ref, p.epsilon);
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphBatchNormScaleShiftTests, TestsBatchNormWithScaleShift) {}
-
-using namespace  MKLDNNPlugin;
-
-const size_t expect_num_impl = InferenceEngine::with_cpu_x86_avx2() ? 3 : 2;
-
-INSTANTIATE_TEST_CASE_P(
-        TestsBatchNormWithScaleShift, MKLDNNGraphBatchNormScaleShiftTests,
-        ::testing::Values(
-                batchnorm_scaleshift_test_params{{1, 32, 128, 256}, 1e-6, 2, expect_num_impl, jit},
-                batchnorm_scaleshift_test_params{{4, 3,  227, 227}, 1e-6, 2, expect_num_impl, jit},
-                batchnorm_scaleshift_test_params{{1, 32, 128, 256}, 1e-6, 2, expect_num_impl, ref, {ref_any}},
-                batchnorm_scaleshift_test_params{{4, 3,  227, 227}, 1e-6, 2, expect_num_impl, ref, {ref_any}}));
-
-
-class MKLDNNGraphDynBatchBatchNormScaleShiftTests: public MKLDNNGraphBatchNormScaleShiftTests {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            batchnorm_scaleshift_test_params p = ::testing::WithParamInterface<batchnorm_scaleshift_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.in.n;
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {p.in.c * 4 * sizeof(float)}, InferenceEngine::C });
-            weights->allocate();
-            fill_data( weights->data().as<float*>(), weights->size() / sizeof(float));
-            float * data = weights->buffer();
-            for (size_t i = 0; i < weights->size() / sizeof(float); i++) {
-                if (data[i] < 0) {
-                    data[i] *= -1;
-                }
-            }
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-            
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = {MB, p.in.c, p.in.h, p.in.w};
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkScaleShift = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return (node->getType() == MKLDNNPlugin::Depthwise && node->getCnnLayer()->type == "ScaleShift")
-                       || node->getType() == MKLDNNPlugin::BatchNormalization;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkScaleShift);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkScaleShift);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchBatchNormScaleShiftTests, TestsDynBatchBatchNormWithScaleShift) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDynBatchBatchNormWithScaleShift, MKLDNNGraphDynBatchBatchNormScaleShiftTests,
-        ::testing::Values(
-                // TODO: rewrite to ngraph to have reshape functionality
-                // batchnorm_scaleshift_test_params{{1, 32, 128, 256}, 1e-6, 2, 5, MKLDNNPlugin::impl_desc_type::jit},
-                // batchnorm_scaleshift_test_params{{1, 32, 128, 256}, 1e-6, 2, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // batchnorm_scaleshift_test_params{{4, 3, 227, 227}, 1e-6, 2, 5, MKLDNNPlugin::impl_desc_type::jit},
-                batchnorm_scaleshift_test_params{{4, 3, 227, 227}, 1e-6, 2, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp
deleted file mode 100644
index 279218e57ed..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp
+++ /dev/null
@@ -1,312 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-#include "tests_common.hpp"
-#include "ie_system_conf.h"
-
-using namespace ::testing;
-using namespace MKLDNNPlugin;
-using namespace mkldnn;
-
-struct batchnorm4D_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in;
-
-    double epsilon;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_batchnorm4D(const InferenceEngine::TBlob<data_t> &src, const data_t *variance, const data_t *mean,
-                     InferenceEngine::TBlob<data_t> &dst, batchnorm4D_test_params prm) {
-    size_t MB = src.getTensorDesc().getDims()[0];
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t IH = src.getTensorDesc().getDims()[2];
-    size_t IW = src.getTensorDesc().getDims()[3];
-
-    const double eps = prm.epsilon;
-
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    for (int c = 0; c < IC; ++c) {
-        data_t v_mean = mean[c];
-        data_t v_variance = variance[c];
-        data_t sqrt_variance = 0;
-
-        sqrt_variance = 1. / sqrt(v_variance + eps);
-
-        for (int n = 0; n < MB; ++n)
-            for (int h = 0; h < IH; ++h)
-                for (int w = 0; w < IW; ++w) {
-                    size_t idx = n * IC * IH * IW
-                                 + c * IH * IW
-                                 + h * IW + w;
-                    dst_data[idx] = (src_data[idx] - v_mean) * sqrt_variance;
-                }
-    }
-}
-
-class MKLDNNGraphBatchNormTests: public TestsCommon,
-                                     public WithParamInterface<batchnorm4D_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="BatchNorm4D_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="batchNorm" id="1" type="BatchNormalization" precision="FP32">
-            <batch_norm_data epsilon="_EPSILON_" PrimitivesPriority="_IMPLS_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S1_" />
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    std::string getModel(batchnorm4D_test_params p) {
-        std::string model = model_t;
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-        REPLACE_WITH_NUM(model, "_EPSILON_", p.epsilon);
-
-        REPLACE_WITH_NUM(model, "_OW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_OH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_OC_", p.in.c);
-
-        size_t w_data_size = p.in.c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            batchnorm4D_test_params p = ::testing::WithParamInterface<batchnorm4D_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {p.in.c * 2 * sizeof(float)}, InferenceEngine::C });
-            weights->allocate();
-            fill_data(weights->buffer(), weights->size() / sizeof(float));
-            float * data = weights->buffer();
-            for (size_t i = 0; i < weights->size() / sizeof(float); i++) {
-                if (data[i] < 0) {
-                    data[i] *= -1;
-                }
-            }
-
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::BatchNormalization) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_TRUE(nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() | p.selectedType);
-                }
-            }
-            ASSERT_GE(5, nodes.size());
-
-            InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_batchnorm4D(*srcPtr, (const float*) weights->buffer(), ((const float*) weights->buffer() + p.in.c), dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphBatchNormTests, TestsBatchNorm) {}
-
-const size_t expect_num_impl = InferenceEngine::with_cpu_x86_avx2() ? 3 : 2;
-
-INSTANTIATE_TEST_CASE_P(
-        TestsBatchNorm, MKLDNNGraphBatchNormTests,
-        ::testing::Values(
-                batchnorm4D_test_params{{1, 32, 128, 256}, 1e-6, expect_num_impl, jit},
-                batchnorm4D_test_params{{3, 3,  128, 256}, 1e-6, expect_num_impl, jit},
-                batchnorm4D_test_params{{1, 32, 128, 256}, 1e-6, expect_num_impl, ref, {ref_any}},
-                batchnorm4D_test_params{{3, 3,  128, 256}, 1e-6, expect_num_impl, ref, {ref_any}}));
-
-class MKLDNNGraphDynBatchBatchNormTests: public MKLDNNGraphBatchNormTests {
-protected:
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            batchnorm4D_test_params p = ::testing::WithParamInterface<batchnorm4D_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.in.n;
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {p.in.c * 4 * sizeof(float)}, InferenceEngine::C });
-            weights->allocate();
-            fill_data( weights->data().as<float*>(), weights->size() / sizeof(float));
-            float * data = weights->buffer();
-            for (size_t i = 0; i < weights->size() / sizeof(float); i++) {
-                if (data[i] < 0) {
-                    data[i] *= -1;
-                }
-            }
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-            
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = {MB, p.in.c, p.in.h, p.in.w};
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            auto* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkScaleShift = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::BatchNormalization;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkScaleShift);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkScaleShift);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchBatchNormTests, TestsDynBatchBatchNorm) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDynBatchBatchNorm, MKLDNNGraphDynBatchBatchNormTests,
-        ::testing::Values(
-                // TODO: rewrite to ngraph to have reshape functionality
-                // batchnorm4D_test_params{{1, 32, 128, 256}, 1e-6, 5, MKLDNNPlugin::impl_desc_type::jit},
-                // batchnorm4D_test_params{{1, 32, 128, 256}, 1e-6, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // batchnorm4D_test_params{{3, 3, 128, 256}, 1e-6, 5, MKLDNNPlugin::impl_desc_type::jit},
-                batchnorm4D_test_params{{3, 3, 128, 256}, 1e-6, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp
deleted file mode 100644
index 5ff03d84f86..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp
+++ /dev/null
@@ -1,1030 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include <unordered_set>
-#include <legacy/cnn_network_impl.hpp>
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-#include "tests_common.hpp"
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct concat_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> in1;
-    vector<size_t> in2;
-
-    size_t axis;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNGraphConcatTests: public TestsCommon,
-                              public WithParamInterface<concat_test_params> {
-    std::string model_t = R"V0G0N(
-<net name="ConcatOnly" version="3" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">__SRC_DIMS_1__
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">__SRC_DIMS_2__
-                </port>
-            </output>
-        </layer>
-        <layer name="con" id="3" type="Concat" precision="FP32">
-            <concat_data axis="_AXIS_"/>
-            <input>
-                <port id="1">__SRC_DIMS_1__
-                </port>
-                <port id="2">__SRC_DIMS_2__
-                </port>
-            </input>
-            <output>
-                <port id="3">__DST_DIMS__
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(concat_test_params p) {
-        std::string model = model_t;
-        std::string s_dims;
-        for (auto& dim : p.in1) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS_1__", s_dims);
-
-        s_dims = "";
-        for (auto& dim : p.in2) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS_2__", s_dims);
-
-        s_dims = "";
-        for (size_t i = 0; i < p.in1.size(); i++) {
-            size_t dim = p.axis == i ? p.in1[i] + p.in2[i] : p.in1[i];
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__DST_DIMS__", s_dims);
-
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            concat_test_params p = ::testing::WithParamInterface<concat_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Concatenation) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
-                }
-            }
-            ASSERT_LE(3, nodes.size());
-
-            InferenceEngine::SizeVector dims_src1 = p.in1;
-            InferenceEngine::SizeVector dims_src2 = p.in2;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.in1.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-                case 6:
-                    layout = InferenceEngine::BLOCKED;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, layout});
-            src1->allocate();
-
-            fill_data(src1->buffer(), src1->size());
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, layout});
-            src2->allocate();
-            fill_data(src2->buffer(), src2->size());
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            // Compare
-            float *src1_ptr = src1->buffer();
-            size_t src1_size = src1->size();
-            float *src2_ptr = src2->buffer();
-            size_t src2_size = src2->size();
-            float *dst_ptr = output->buffer();
-            size_t dst_size = output->size();
-
-            int len1 = 1, len2 = 1, cycles;
-            for (int dim = p.axis; dim < output->getTensorDesc().getDims().size(); dim++) {
-                len1 *= src1->getTensorDesc().getDims()[dim];
-                len2 *= src2->getTensorDesc().getDims()[dim];
-            }
-            cycles = p.axis;
-
-
-            int index1 = 0, index2 = 0, index = 0;
-            for (int cycle = 0; cycle < cycles; cycle ++) {
-                for (int i1 = 0; i1 < len1; i1++) {
-                    if (src1_ptr[index1] != dst_ptr[index])
-                    {
-                        FAIL() << "index: " << index << " src: " << src1_ptr[index1] << ", dst: " << dst_ptr[index];
-                    }
-                    index1++; index++;
-                }
-                for (int i2 = 0; i2 < len2; i2++) {
-                    if (src2_ptr[index2] != dst_ptr[index])
-                    {
-                        FAIL() << "index: " << index << " src: " << src2_ptr[index2] << ", dst: " << dst_ptr[index];
-                    }
-                    index2++; index++;
-                }
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphConcatTests, TestsConcat) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsConcat, MKLDNNGraphConcatTests,
-        ::testing::Values(
-                concat_test_params {
-                        {1, 3, 3, 5},
-                        {1, 3, 3, 5},
-                        1, 2
-                },
-                concat_test_params {
-                        {1, 7, 1, 5},
-                        {1, 7, 9, 5},
-                        2, 1, MKLDNNPlugin::impl_desc_type::ref
-                },
-                concat_test_params {
-                        {1, 2, 3, 5, 3},
-                        {1, 5, 3, 5, 3},
-                        1, 2
-                },
-                concat_test_params {
-                        {1, 32, 3, 4, 5},
-                        {1, 32, 3, 4, 5},
-                        1, 6, MKLDNNPlugin::impl_desc_type::unknown
-                },
-                concat_test_params {
-                        {1, 64, 16, 16, 16, 1},
-                        {1, 64, 16, 16, 16, 1},
-                        5, 1, MKLDNNPlugin::impl_desc_type::ref
-                }));
-
-class MKLDNNGraphDynBatchConcatTests: public TestsCommon, public WithParamInterface<concat_test_params> {
-    std::string model_t = R"V0G0N(
-<net name="ConcatOnly" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>1</dim>__SRC_DIMS_1__
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    <dim>1</dim>__SRC_DIMS_2__
-                </port>
-            </output>
-        </layer>
-        <layer name="con" id="3" type="Concat" precision="FP32">
-            <concat_data axis="_AXIS_"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>__SRC_DIMS_1__
-                </port>
-                <port id="2">
-                    <dim>1</dim>__SRC_DIMS_2__
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>1</dim>__DST_DIMS__
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(concat_test_params p) {
-        std::string model = model_t;
-        std::string s_dims;
-        for (size_t i = 1; i < p.in1.size(); i++) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(p.in1[i]) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS_1__", s_dims);
-
-        s_dims = "";
-        for (size_t i = 1; i < p.in2.size(); i++) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(p.in2[i]) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS_2__", s_dims);
-
-        s_dims = "";
-        for (size_t i = 1; i < p.in1.size(); i++) {
-            size_t dim = p.axis == i ? p.in1[i] + p.in2[i] : p.in1[i];
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__DST_DIMS__", s_dims);
-
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            concat_test_params p = ::testing::WithParamInterface<concat_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.in1[0];
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src1 = p.in1;
-            InferenceEngine::SizeVector dims_src2 = p.in2;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.in1.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-                case 6:
-                    layout = InferenceEngine::BLOCKED;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, layout});
-            src1->allocate();
-
-            fill_data(src1->buffer(), src1->size());
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, layout});
-            src2->allocate();
-            fill_data(src2->buffer(), src2->size());
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-
-            auto checkConcat = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Concatenation;
-            };
-
-            MKLDNNGraphTestClass::CheckDynBatchType checkType = MKLDNNGraphTestClass::CheckDynBatchType::Both;
-            if (p.selectedType == MKLDNNPlugin::impl_desc_type::unknown)
-                checkType = MKLDNNGraphTestClass::CheckDynBatchType::Child;
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkConcat, checkType);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkConcat, checkType);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchConcatTests, TestsDynBatchConcat) {}
-
-
-// TODO: rewrite to ngraph to have reshape functionality
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_TestsDynBatchConcat, MKLDNNGraphDynBatchConcatTests,
-        ::testing::Values(
-                concat_test_params {
-                        {1, 7, 2, 5},
-                        {1, 7, 2, 5},
-                        2, 1, MKLDNNPlugin::impl_desc_type::ref
-                },
-                concat_test_params {
-                        {1, 7, 2, 5},
-                        {1, 13, 2, 5},
-                        1, 2, MKLDNNPlugin::impl_desc_type::unknown
-                },
-                concat_test_params {
-                        {1, 7, 2, 13},
-                        {1, 7, 2, 17},
-                        3, 1, MKLDNNPlugin::impl_desc_type::ref
-                },
-                concat_test_params {
-                        {1, 8, 8, 16},
-                        {1, 16, 8, 16},
-                        1, 4, MKLDNNPlugin::impl_desc_type::unknown
-                },
-                concat_test_params {
-                        {3, 7, 2, 5},
-                        {3, 13, 2, 5},
-                        1, 2, MKLDNNPlugin::impl_desc_type::unknown
-                },
-                concat_test_params {
-                        {2, 2, 3, 3},
-                        {2, 3, 3, 3},
-                        1, 2, MKLDNNPlugin::impl_desc_type::unknown
-                },
-                concat_test_params {
-                        {2, 2, 3, 3, 3},
-                        {2, 3, 3, 3, 3},
-                        1, 2, MKLDNNPlugin::impl_desc_type::unknown
-                }));
-
-struct concat_param {
-    std::string name;
-    size_t axis;
-    size_t input1;
-    size_t input2;
-};
-
-struct two_concat_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> in1;
-    vector<size_t> in2;
-    vector<size_t> in3;
-
-    concat_param concat1;
-    concat_param concat2;
-};
-
-class MKLDNNGraphTwoConcatTests: public TestsCommon,
-                                 public WithParamInterface<two_concat_test_params>  {
-    std::string model_t = R"V0G0N(
-<net name="TwoConcatsDiffFwd" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">__SRC_DIMS_1__
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="1">__SRC_DIMS_2__
-                </port>
-            </output>
-        </layer>
-        <layer name="in3" type="Input" precision="FP32" id="3">
-            <output>
-                <port id="1">__SRC_DIMS_3__
-                </port>
-            </output>
-        </layer>
-        <layer name="_CONCAT1_NAME_" id="4" type="Concat" precision="FP32">
-            <concat_data axis="_CONCAT1_AXIS_"/>
-            <input>
-                <port id="1">
-                    <dim>_CI41N_</dim>
-                    <dim>_CI41C_</dim>
-                    <dim>_CI41D_</dim>
-                    <dim>_CI41H_</dim>
-                    <dim>_CI41W_</dim>
-                </port>
-                <port id="2">
-                    <dim>_CI42N_</dim>
-                    <dim>_CI42C_</dim>
-                    <dim>_CI42D_</dim>
-                    <dim>_CI42H_</dim>
-                    <dim>_CI42W_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">__CO_DIMS_1__
-                </port>
-            </output>
-        </layer>
-        <layer name="_CONCAT2_NAME_" id="5" type="Concat" precision="FP32">
-            <concat_data axis="_CONCAT2_AXIS_"/>
-            <input>
-                <port id="1">
-                    <dim>_CI51N_</dim>
-                    <dim>_CI51C_</dim>
-                    <dim>_CI51D_</dim>
-                    <dim>_CI51H_</dim>
-                    <dim>_CI51W_</dim>
-                </port>
-                <port id="2">
-                    <dim>_CI52N_</dim>
-                    <dim>_CI52C_</dim>
-                    <dim>_CI52D_</dim>
-                    <dim>_CI52H_</dim>
-                    <dim>_CI52W_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">__CO_DIMS_2__
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="_FL11_" to-port="_FP11_"/>
-        <edge from-layer="2" from-port="1" to-layer="_FL21_" to-port="_FP21_"/>
-        <edge from-layer="3" from-port="1" to-layer="_FL31_" to-port="_FP31_"/>
-        <edge from-layer="_FSL_" from-port="_FSP_" to-layer="_FSLTL_" to-port="_FSLTP_"/>
-    </edges>
-</net>
-)V0G0N";
-    void changeEdgeToLayer(std::string& model, int f_l, int f_p, int t_l, int t_p, vector<size_t> dims) {
-        std::string TL = "_FL" + std::to_string(f_l) + std::to_string(f_p) + "_";
-        std::string TP = "_FP" + std::to_string(f_l) + std::to_string(f_p) + "_";
-        if (!FIND_STR(model, TL) || !FIND_STR(model, TP)) {
-            if (!FIND_STR(model, "_FSL_") || !FIND_STR(model, "_FSP_") ||
-                    !FIND_STR(model, "_FSLTL_") || !FIND_STR(model, "_FSLTP_")) {
-                IE_THROW() << "Incorrect configuration!";
-            }
-            REPLACE_WITH_NUM(model, "_FSL_", f_l);
-            REPLACE_WITH_NUM(model, "_FSP_", f_p);
-            REPLACE_WITH_NUM(model, "_FSLTL_", t_l);
-            REPLACE_WITH_NUM(model, "_FSLTP_", t_p);
-        } else {
-            REPLACE_WITH_NUM(model, TL, t_l);
-            REPLACE_WITH_NUM(model, TP, t_p);
-        }
-
-        std::string CI = "_CI" + std::to_string(t_l) + std::to_string(t_p);
-        auto dims_size = dims.size();
-        REPLACE_WITH_NUM(model, CI + "N_", dims[0]);
-        REPLACE_WITH_NUM(model, CI + "C_", dims[1]);
-        REPLACE_WITH_NUM(model, CI + "H_", dims[dims_size - 2]);
-        REPLACE_WITH_NUM(model, CI + "W_", dims[dims_size - 1]);
-        if (dims_size < 5) REMOVE_LINE(model, std::string("<dim>") + CI + std::string("D_") + "</dim>");
-        else REPLACE_WITH_NUM(model, CI + "D_", dims[dims_size - 3]);
-    }
-
-
-    std::string getModel(two_concat_test_params p) {
-        std::string model = model_t;
-        std::string s_dims;
-        for (size_t i = 0; i < p.in1.size(); i++) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(p.in1[i]) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS_1__", s_dims);
-
-        s_dims = "";
-        for (size_t i = 0; i < p.in2.size(); i++) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(p.in2[i]) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS_2__", s_dims);
-
-        s_dims = "";
-        for (size_t i = 0; i < p.in3.size(); i++) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(p.in3[i]) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS_3__", s_dims);
-
-        vector<size_t> concat11;
-        switch (p.concat1.input1) {
-            case 1:
-                changeEdgeToLayer(model, 2, 1, 4, 1, p.in2);
-                concat11 = p.in2;
-                break;
-            case 2:
-                changeEdgeToLayer(model, 3, 1, 4, 1, p.in3);
-                concat11 = p.in3;
-                break;
-            default:
-                changeEdgeToLayer(model, 1, 1, 4, 1, p.in1);
-                concat11 = p.in1;
-        }
-
-        vector<size_t> concat12;
-        switch (p.concat1.input2) {
-            case 1:
-                changeEdgeToLayer(model, 2, 1, 4, 2, p.in2);
-                concat12 = p.in2;
-                break;
-            case 2:
-                changeEdgeToLayer(model, 3, 1, 4, 2, p.in3);
-                concat12 = p.in3;
-                break;
-            default:
-                changeEdgeToLayer(model, 1, 1, 4, 2, p.in1);
-                concat12 = p.in1;
-        }
-
-        vector<size_t> concat21;
-        switch (p.concat2.input1) {
-            case 1:
-                changeEdgeToLayer(model, 2, 1, 5, 1, p.in2);
-                concat21 = p.in2;
-                break;
-            case 2:
-                changeEdgeToLayer(model, 3, 1, 5, 1, p.in3);
-                concat21 = p.in3;
-                break;
-            default:
-                changeEdgeToLayer(model, 1, 1, 5, 1, p.in1);
-                concat21 = p.in1;
-        }
-
-        vector<size_t> concat22;
-        switch (p.concat2.input2) {
-            case 1:
-                changeEdgeToLayer(model, 2, 1, 5, 2, p.in2);
-                concat22 = p.in2;
-                break;
-            case 2:
-                changeEdgeToLayer(model, 3, 1, 5, 2, p.in3);
-                concat22 = p.in3;
-                break;
-            default:
-                changeEdgeToLayer(model, 1, 1, 5, 2, p.in1);
-                concat22 = p.in1;
-        }
-
-        s_dims = "";
-        for (size_t i = 0; i < p.in2.size(); i++) {
-            size_t concat = p.concat1.axis == i ? concat11[i] + concat12[i] : concat21[i];
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(concat) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__CO_DIMS_1__", s_dims);
-
-        REPLACE_WITH_NUM(model, "_CONCAT1_AXIS_", p.concat1.axis);
-        REPLACE_WITH_STR(model, "_CONCAT1_NAME_", p.concat1.name);
-
-        s_dims = "";
-        for (size_t i = 0; i < p.in2.size(); i++) {
-            size_t concat = p.concat2.axis == i ? concat21[i] + concat22[i] : concat21[i];
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(concat) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__CO_DIMS_2__", s_dims);
-
-        REPLACE_WITH_NUM(model, "_CONCAT2_AXIS_", p.concat2.axis);
-        REPLACE_WITH_STR(model, "_CONCAT2_NAME_", p.concat2.name);
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            two_concat_test_params p = ::testing::WithParamInterface<two_concat_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src1 = p.in1;
-            InferenceEngine::SizeVector dims_src2 = p.in2;
-            InferenceEngine::SizeVector dims_src3 = p.in3;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.in1.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, layout});
-            src1->allocate();
-            fill_data(src1->buffer(), src1->size());
-
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, layout});
-            src2->allocate();
-            fill_data(src2->buffer(), src2->size());
-
-            InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src3, layout});
-            src3->allocate();
-            fill_data(src3->buffer(), src3->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in3", src3));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            for (auto & it : out) {
-                std::pair<std::string, InferenceEngine::DataPtr> item = it;
-                InferenceEngine::TBlob<float>::Ptr output;
-                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-            }
-
-            graph.Infer(srcs, outputBlobs);
-
-            for (const auto& concat : {p.concat1, p.concat2}) {
-                float *src1_ptr;
-                size_t src1_size;
-                float *src2_ptr;
-                size_t src2_size;
-                InferenceEngine::Blob::Ptr src1_c;
-                InferenceEngine::Blob::Ptr src2_c;
-
-                switch (concat.input1) {
-                    case 1:
-                        src1_ptr = src2->buffer();
-                        src1_size = src2->size();
-                        src1_c = src2;
-                        break;
-                    case 2:
-                        src1_ptr = src3->buffer();
-                        src1_size = src3->size();
-                        src1_c = src3;
-                        break;
-                    default:
-                        src1_ptr = src1->buffer();
-                        src1_size = src1->size();
-                        src1_c = src1;
-                }
-
-                switch (concat.input2) {
-                    case 1:
-                        src2_ptr = src2->buffer();
-                        src2_size = src2->size();
-                        src2_c = src2;
-                        break;
-                    case 2:
-                        src2_ptr = src3->buffer();
-                        src2_size = src3->size();
-                        src2_c = src3;
-                        break;
-                    default:
-                        src2_ptr = src1->buffer();
-                        src2_size = src1->size();
-                        src2_c = src1;
-                }
-
-                float *dst_ptr = outputBlobs[concat.name]->buffer();
-                size_t dst_size = outputBlobs[concat.name]->size();
-
-                int len1 = 1, len2 = 1, cycles;
-                for (int dim = concat.axis; dim < outputBlobs[concat.name]->getTensorDesc().getDims().size(); dim++) {
-                    len1 *= src1_c->getTensorDesc().getDims()[dim];
-                    len2 *= src2_c->getTensorDesc().getDims()[dim];
-                }
-                cycles = concat.axis;
-
-                int index1 = 0, index2 = 0, index = 0;
-                for (int cycle = 0; cycle < cycles; cycle ++) {
-                    for (int i1 = 0; i1 < len1; i1++) {
-                        if (src1_ptr[index1] != dst_ptr[index])
-                        {
-                            FAIL() << concat.name << " index: " << index << " src: "
-                                   << src1_ptr[index1] << ", dst: " << dst_ptr[index];
-                        }
-                        index1++; index++;
-                    }
-                    for (int i2 = 0; i2 < len2; i2++) {
-                        if (src2_ptr[index2] != dst_ptr[index])
-                        {
-                            FAIL() << concat.name << " index: " << index << " src: "
-                                   << src2_ptr[index2] << ", dst: " << dst_ptr[index];
-                        }
-                        index2++; index++;
-                    }
-                }
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphTwoConcatTests, TestsTwoConcat) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsTwoConcat, MKLDNNGraphTwoConcatTests,
-        ::testing::Values(
-                two_concat_test_params {
-                        {1, 5, 2, 5},
-                        {3, 5, 2, 5},
-                        {1, 5, 2, 5},
-                        {"concat1", 0, 0, 1},
-                        {"concat2", 0, 1, 2}
-                },
-                two_concat_test_params {
-                        {1, 2, 2, 5},
-                        {1, 5, 2, 5},
-                        {3, 5, 2, 5},
-                        {"concat1", 1, 0, 1},
-                        {"concat2", 0, 1, 2}
-                },
-                two_concat_test_params {
-                        {1, 2, 2, 2},
-                        {1, 1, 2, 2},
-                        {1, 3, 2, 2},
-                        {"concat1", 1, 0, 1},
-                        {"concat2", 1, 1, 2}
-                },
-                two_concat_test_params {
-                        {1, 5, 2, 5},
-                        {3, 5, 2, 5},
-                        {1, 5, 2, 5},
-                        {"concat1", 0, 0, 1},
-                        {"concat2", 0, 2, 1}
-                },
-                two_concat_test_params {
-                        {1, 2, 2, 5},
-                        {1, 5, 2, 5},
-                        {3, 5, 2, 5},
-                        {"concat1", 1, 0, 1},
-                        {"concat2", 0, 2, 1}
-                },
-                two_concat_test_params {
-                        {1, 2, 2, 2},
-                        {1, 1, 2, 2},
-                        {1, 3, 2, 2},
-                        {"concat1", 1, 0, 1},
-                        {"concat2", 1, 2, 1}
-                }));
-
-
-class MKLDNNGraphTwoInputInConcatTests: public TestsCommon {
-    std::string model_t = R"V0G0N(
-<net name="TwoConcatsDiffFwd" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="norm" id="3" type="ReLU" precision="FP32">
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="power" id="4" type="Power" precision="FP32">
-            <power_data power="-1" scale="-1" shift="0"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="o_concat" id="5" type="Concat" precision="FP32">
-            <concat_data axis="1"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>5</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
-        <edge from-layer="1" from-port="1" to-layer="5" to-port="2"/>
-        <edge from-layer="1" from-port="1" to-layer="4" to-port="1"/>
-        <edge from-layer="2" from-port="1" to-layer="5" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            std::string model = model_t;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src1 = {1, 3, 2, 2};
-            InferenceEngine::SizeVector dims_src2 = {1, 2, 2, 2};
-
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::NCHW});
-            src1->allocate();
-            float *src1_data = src1->buffer();
-            for (size_t i = 0; i < src1->size(); i++) {
-                src1_data[i] = i + 1;
-            }
-
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::NCHW});
-            src2->allocate();
-            fill_data(src2->buffer(), src2->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            for (auto & it : out) {
-                std::pair<std::string, InferenceEngine::DataPtr> item = it;
-                InferenceEngine::TBlob<float>::Ptr output;
-                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-            }
-
-            graph.Infer(srcs, outputBlobs);
-
-            float *src1_ptr = src2->buffer();
-            float *src2_ptr = src1->buffer();
-
-            float *dst_ptr = outputBlobs["o_concat"]->buffer();
-
-            int len1 = 1, len2 = 1, cycles;
-            for (int dim = 1; dim < outputBlobs["o_concat"]->getTensorDesc().getDims().size(); dim++) {
-                len1 *= src2->getTensorDesc().getDims()[dim];
-                len2 *= src1->getTensorDesc().getDims()[dim];
-            }
-            cycles = 1;
-
-            int index1 = 0, index2 = 0, index = 0;
-            for (int cycle = 0; cycle < cycles; cycle ++) {
-                for (int i1 = 0; i1 < len1; i1++) {
-                    if (src1_ptr[index1] != dst_ptr[index])
-                    {
-                        FAIL() << "concat index: " << index << " src: "
-                               << src1_ptr[index1] << ", dst: " << dst_ptr[index];
-                    }
-                    index1++; index++;
-                }
-                for (int i2 = 0; i2 < len2; i2++) {
-                    if (src2_ptr[index2] != dst_ptr[index])
-                    {
-                        FAIL() << "concat index: " << index << " src: "
-                               << src2_ptr[index2] << ", dst: " << dst_ptr[index];
-                    }
-                    index2++; index++;
-                }
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_F(MKLDNNGraphTwoInputInConcatTests, TestSecondInputToConcat) {}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
deleted file mode 100644
index 199a92641db..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
+++ /dev/null
@@ -1,531 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include <legacy/cnn_network_impl.hpp>
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <ie_system_conf.h>
-
-using namespace InferenceEngine;
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct conv_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> dims;
-    // Formats: WH, WHD
-    vector<size_t> kernel;
-    vector<size_t> strides;
-    vector<size_t> pads_begin;
-    vector<size_t> pads_end;
-
-    size_t out_c;
-    size_t grp_c;
-    string auto_pad;
-
-    size_t num_prim_desc;
-
-    int selectedType;
-    bool defaultPrimitivesPriority;
-    vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_conv(const TBlob<data_t> &src, const data_t *weights, const size_t weightsSize,
-                TBlob<data_t> &dst, struct conv_test_params prm) {
-    SizeVector src_dims = src.getTensorDesc().getDims();
-    auto dims_size = src_dims.size();
-
-    size_t KW = prm.kernel[X_AXIS];
-    size_t KH = prm.kernel[Y_AXIS];
-    size_t KD = dims_size == 5 ? prm.kernel[Z_AXIS] : 1u;
-    size_t GC = prm.grp_c;
-
-    size_t IC = src_dims[1];
-    size_t ID = dims_size == 5 ? src_dims[dims_size - 3] : 1u;
-    size_t IH = src_dims[dims_size - 2];
-    size_t IW = src_dims[dims_size - 1];
-
-    size_t OW = (IW + prm.pads_end[X_AXIS] + prm.pads_begin[X_AXIS] - prm.kernel[X_AXIS]) / prm.strides[X_AXIS] + 1u;
-    size_t OH = (IH + prm.pads_end[Y_AXIS] + prm.pads_begin[Y_AXIS] - prm.kernel[Y_AXIS]) / prm.strides[Y_AXIS] + 1u;
-    size_t OD = dims_size == 5 ? (ID + 2u * prm.pads_begin[Z_AXIS] - prm.kernel[Z_AXIS]) / prm.strides[Z_AXIS] + 1u : 1u;
-    size_t OC = prm.out_c;
-
-
-    const data_t *src_data = src.readOnly();
-    const data_t *weights_data = weights;
-    const data_t *bias_data = weights_data + KW * KH * KD * OC * IC / GC;
-    data_t *dst_data = dst.data();
-
-    IE_ASSERT(KW * KH * KD * OC * IC / GC + OC == weightsSize);
-    SizeVector dst_dims = dst.getTensorDesc().getDims();
-    auto dst_dims_size = dst_dims.size();
-    IE_ASSERT(OW == dst_dims[dst_dims_size - 1]);
-    IE_ASSERT(OH == dst_dims[dst_dims_size - 2]);
-
-    size_t SC1 = OH * OW;
-    size_t SC2 = SC1 * OD;
-    size_t SC3 = OC / GC;
-    size_t SC4 = SC2 * SC3;
-
-    size_t IC1 = IH * IW;
-    size_t IC2 = IC1 * ID;
-    size_t IC3 = IC / GC;
-    size_t IC4 = IC2 * IC3;
-
-    size_t KC1 = KH * KW;
-    size_t KC2 = KC1 * KD;
-    size_t KC3 = IC3 * KC2;
-    size_t KC4 = SC3 * KC3;
-
-    for (uint32_t g = 0; g < GC; g++) {
-        size_t gc = g * SC4;
-        size_t goc = g * SC3;
-        size_t gic = g * IC4;
-        size_t gkc = g * KC4;
-        for (uint32_t oc = 0; oc < OC / GC; oc++) {
-            size_t cc = gc + oc * SC2;
-            size_t gooc = goc + oc;
-            size_t gkoc = gkc + oc * KC3;
-            for (uint32_t od = 0; od < OD; od++) {
-                size_t dc = cc + od * SC1;
-                for (uint32_t oh = 0; oh < OH; oh++) {
-                    size_t hc = dc + oh * OW;
-                    for (uint32_t ow = 0; ow < OW; ow++) {
-                        size_t oidx = hc + ow;
-
-                        dst_data[oidx] = bias_data[gooc];
-
-                        for (size_t ic = 0; ic < IC / GC; ic++) {
-                            size_t icc = gkoc + ic * KC2;
-                            size_t kicc = gic + ic * IC2;
-                            for (size_t kd = 0; kd < KD; kd++) {
-                                int32_t id = dims_size == 5 ? od * prm.strides[Z_AXIS] - prm.pads_begin[Z_AXIS] + kd : 0;
-                                if (id < 0 || id >= (int32_t)ID) continue;
-                                size_t kidc = kicc + id * IC1;
-                                size_t kdc = icc + kd * KC1;
-                                for (size_t kh = 0; kh < KH; kh++) {
-                                    int32_t ih = oh * prm.strides[Y_AXIS] - prm.pads_begin[Y_AXIS] + kh;
-                                    if (ih < 0 || ih >= (int32_t)IH) continue;
-                                    size_t kihc = kidc + ih * IW;
-                                    size_t khc = kdc + kh * KW;
-                                    for (size_t kw = 0; kw < KW; kw++) {
-                                        int32_t iw = ow * prm.strides[X_AXIS] - prm.pads_begin[X_AXIS] + kw;
-                                        if (iw < 0 || iw >= (int32_t)IW) continue;
-
-                                        size_t iidx = kihc + iw;
-                                        size_t widx = khc + kw;
-
-                                        dst_data[oidx] += src_data[iidx] * weights_data[widx];
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNGraphConvolutionTests: public TestsCommon,
-                                   public WithParamInterface<conv_test_params> {
-    std::string model_t_5D = R"V0G0N(
-<net name="Convolution_Only" version="4" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">__SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" id="1" type="Convolution" precision="FP32">
-            <convolution _AP_ kernel="_K_"
-                         pads_begin="_PB_"  pads_end="_PE_"
-                         strides="_KS_"
-                         output="_OC_"  group="_GC_" _PRIM_PRIORITY_/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="1">__SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>__DST_DIMS__
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-    std::string getModel(conv_test_params p) {
-        std::string model = model_t_5D;
-        std::string s_dims;
-        for (auto& dim : p.dims) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS__", s_dims);
-
-        s_dims = "";
-        int k_len = p.kernel.size();
-        for (size_t i = 2; i < p.dims.size(); i++) {
-            size_t inx = k_len - i + 1;
-            size_t dim = (p.dims[i] + p.pads_end[inx] + p.pads_begin[inx] - p.kernel[inx]) / p.strides[inx] + 1lu;
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__DST_DIMS__", s_dims);
-
-        REPLACE_WITH_NUM(model, "_IN_", p.dims[0]);
-
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.kernel);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.strides);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.pads_begin);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.pads_end);
-        string auto_pad;
-        if (!p.auto_pad.empty()) auto_pad = string("auto_pad=") + string("\"") + p.auto_pad + string("\"");
-        REPLACE_WITH_STR(model, "_AP_", auto_pad);
-
-        REPLACE_WITH_NUM(model, "_GC_", p.grp_c);
-        REPLACE_WITH_NUM(model, "_OC_", p.out_c);
-
-        size_t w_data_size = 1;
-        for (auto ker : p.kernel) {
-            w_data_size *= ker;
-        }
-
-        w_data_size = (w_data_size * p.out_c * p.dims[1] / p.grp_c) * sizeof(float);
-        size_t b_data_size = p.out_c * sizeof(float);
-
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-
-        std::string primitivesPriorityStr;
-        if (!p.defaultPrimitivesPriority) {
-            std::string impls;
-            for (const auto& preferType : p.preferTypes) {
-                if (!impls.empty())
-                    impls += ",";
-                impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-            }
-            primitivesPriorityStr = "PrimitivesPriority=\"" + impls + "\"";
-        }
-        REPLACE_WITH_STR(model, "_PRIM_PRIORITY_", primitivesPriorityStr);
-
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            conv_test_params p = ::testing::WithParamInterface<conv_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            size_t blob_size = p.out_c * p.dims[1] / p.grp_c;
-            for (auto k : p.kernel) {
-                blob_size *= k;
-            }
-            blob_size = (blob_size + p.out_c) * sizeof(float);
-            TBlob<uint8_t> *weights = new TBlob<uint8_t>
-                    ({ Precision::U8, {blob_size}, C });
-            weights->allocate();
-
-            fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-
-            TBlob<uint8_t>::Ptr weights_ptr = TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-            bool isWino = false;
-            for (auto &node : nodes) {
-                if (node->getType() == MKLDNNPlugin::Convolution) {
-                    ASSERT_LE(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (const auto prim : node->getSupportedPrimitiveDescriptors()) {
-                        if (p.defaultPrimitivesPriority) {
-                            if (prim.getImplementationType() & MKLDNNPlugin::impl_desc_type::gemm)
-                                FAIL() << "There should be no gemm implementation in supportedPrimitiveDescriptors";
-                        }
-                        std::cout << MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(prim.getImplementationType()) << " ";
-                    }
-                    std::cout << std::endl;
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    if (InferenceEngine::with_cpu_x86_avx512f() &&
-                            InferenceEngine::with_cpu_x86_avx512_core()
-                            && !p.preferTypes.empty()
-                            && p.preferTypes[0] == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd) {
-                        isWino = true;
-                        ASSERT_EQ(p.preferTypes[0], node->getSelectedPrimitiveDescriptor()->getImplementationType());
-                    } else {
-                        ASSERT_EQ(p.selectedType,
-                                  node->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                    }
-                }
-            }
-
-            Layout layout = ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = NCHW;
-                    break;
-                case 5:
-                    layout = NCDHW;
-                    break;
-            }
-
-            Blob::Ptr src = make_shared_blob<float>
-                    ({ Precision::FP32, p.dims, layout });
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            BlobMap srcs;
-            srcs.insert(std::pair<std::string, Blob::Ptr>("in1", src));
-
-            OutputsDataMap out;
-            out = network.getOutputsInfo();
-            BlobMap outputBlobs;
-
-            std::pair<std::string, DataPtr> item = *out.begin();
-
-            TBlob<float>::Ptr output;
-            output = make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-            ref_conv(*srcPtr, (const float *)weights->buffer(), weights->size() / sizeof(float), dst_ref, p);
-            compare(*output, dst_ref, 0.0002f);
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphConvolutionTests, TestsConvolution) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-    TestConvolution, MKLDNNGraphConvolutionTests,
-    ::testing::Values(
-        /*0*/   conv_test_params{{1, 9, 16, 32}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "same_upper", 6,
-                                 MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_1x1, false },
-                conv_test_params{{1, 9, 32, 16}, {2, 4}, {1, 1}, {1, 1}, {0, 2}, 17, 1, "", 4, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 9, 32, 16}, {2, 4}, {2, 1}, {0, 0}, {0, 0}, 17, 1, "", 4, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 3, 40, 40}, {3, 3}, {1, 2}, {0, 0}, {0, 0}, 20, 1, "", 4, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 1, 40, 40}, {3, 3}, {1, 2}, {0, 0}, {0, 0}, 20, 1, "", 4, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 1, 32, 16}, {2, 4}, {2, 1}, {0, 0}, {0, 0}, 17, 1, "", 4, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 9, 32, 16}, {2, 4}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 4, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 4, 54, 96}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, 64, 1, "", 3, MKLDNNPlugin::impl_desc_type::jit, false },
-                // 5D
-        /*8*/   conv_test_params{{1, 3, 15, 20, 20}, {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 24, 15, 20, 20}, {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 32, 15, 20, 20}, {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 3, 15, 25, 20}, {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 24, 15, 25, 20}, {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, false },
-        /*13*/  conv_test_params{{1, 32, 15, 25, 20}, {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 16, 30, 30, 10}, {5, 5, 5}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, 16, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, false },
-                conv_test_params{{1, 16, 30, 30, 10}, {5, 5, 5}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, 16, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, false} ));
-
-#ifdef USE_MKL
-INSTANTIATE_TEST_CASE_P(
-    MKLTestConvolution, MKLDNNGraphConvolutionTests,
-    ::testing::Values(
-                conv_test_params{{1, 9, 16, 32},
-                                 {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 6, MKLDNNPlugin::impl_desc_type::gemm, false,
-                                 {MKLDNNPlugin::impl_desc_type::gemm_any,
-                                  MKLDNNPlugin::impl_desc_type::gemm_blas,
-                                  MKLDNNPlugin::impl_desc_type::gemm_avx512,
-                                  MKLDNNPlugin::impl_desc_type::gemm_avx2,
-                                  MKLDNNPlugin::impl_desc_type::gemm_sse42} },
-                conv_test_params{{1, 5, 15, 20, 20},
-                                 {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas, false,
-                                 {MKLDNNPlugin::impl_desc_type::gemm_blas} },
-                conv_test_params{{1, 5, 15, 20, 20},
-                                 {3, 3, 3}, {3, 2, 1}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas, false,
-                                 {MKLDNNPlugin::impl_desc_type::gemm_blas} },
-                // conv_test_params{{1, 5, 15, 20, 20},
-                //                  {3, 3, 3}, {1, 1, 1}, {2, 2, 2}, {1, 1, 1}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas, false,
-                //                  {MKLDNNPlugin::impl_desc_type::gemm_blas}  },
-                conv_test_params{{1, 16, 30, 30, 10},
-                                 {5, 5, 5}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, 16, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas, false,
-                                 {MKLDNNPlugin::impl_desc_type::gemm_blas} },
-                conv_test_params{{1, 4, 16, 16, 16},
-                                 {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, 8, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas, false,
-                                 {MKLDNNPlugin::impl_desc_type::gemm_blas} } ));
-#endif
-
-INSTANTIATE_TEST_CASE_P(
-    TestConvolutionDefaultPrimitivesPriority, MKLDNNGraphConvolutionTests,
-    ::testing::Values(
-        /*0*/   conv_test_params{{1, 9, 16, 32}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "same_upper", 6,
-                                 MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_1x1, true },
-                conv_test_params{{1, 9, 32, 16}, {2, 4}, {1, 1}, {1, 1}, {0, 2}, 17, 1, "", 3, MKLDNNPlugin::impl_desc_type::jit, true },
-                conv_test_params{{1, 9, 32, 16}, {2, 4}, {2, 1}, {0, 0}, {0, 0}, 17, 1, "", 3, MKLDNNPlugin::impl_desc_type::jit, true },
-                conv_test_params{{1, 3, 40, 40}, {3, 3}, {1, 2}, {0, 0}, {0, 0}, 20, 1, "", 3, MKLDNNPlugin::impl_desc_type::jit, true },
-                conv_test_params{{1, 1, 40, 40}, {3, 3}, {1, 2}, {0, 0}, {0, 0}, 20, 1, "", 3, MKLDNNPlugin::impl_desc_type::jit, true },
-                conv_test_params{{1, 1, 32, 16}, {2, 4}, {2, 1}, {0, 0}, {0, 0}, 17, 1, "", 3, MKLDNNPlugin::impl_desc_type::jit, true },
-        // 5D
-        /*6*/   conv_test_params{{1, 3, 15, 25, 20}, {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, true },
-                conv_test_params{{1, 24, 15, 25, 20}, {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, true },
-                conv_test_params{{1, 32, 15, 25, 20}, {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, true },
-                conv_test_params{{1, 16, 30, 30, 10}, {5, 5, 5}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, 16, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit, true } ));
-
-
-class MKLDNNGraphDynBatchConvolutionTests: public MKLDNNGraphConvolutionTests {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            conv_test_params p = ::testing::WithParamInterface<conv_test_params>::GetParam();
-            std::string model = getModel(p);
-            std::vector<size_t> dims = p.dims;
-            if (dims[0] < 2)
-                dims[0] = 2;
-
-            size_t blob_size = p.out_c * dims[1] / p.grp_c;
-            for (auto k : p.kernel) {
-                blob_size *= k;
-            }
-            blob_size = (blob_size + p.out_c) * sizeof(float);
-            TBlob<uint8_t> *weights = new TBlob<uint8_t>({ Precision::U8, {blob_size}, Layout::C });
-            weights->allocate();
-            fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-            TBlob<uint8_t>::Ptr weights_ptr = TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            ResponseDesc resp;
-            StatusCode sts  = implNet->setBatchSizeReshape(dims[0], &resp);
-            ASSERT_EQ((int)StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            Layout layout = ANY;
-            switch (dims.size()) {
-                case 4:
-                    layout = NCHW;
-                    break;
-                case 5:
-                    layout = NCDHW;
-                    break;
-            }
-
-            Blob::Ptr src = make_shared_blob<float>({ Precision::FP32, dims, layout });
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            BlobMap srcs;
-            srcs.insert(std::pair<std::string, Blob::Ptr>("in1", src));
-
-            OutputsDataMap out;
-            out = network.getOutputsInfo();
-            BlobMap outputBlobs;
-
-            std::pair<std::string, DataPtr> item = *out.begin();
-
-            TBlob<float>::Ptr output;
-            output = make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkConvolution = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Convolution;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, dims[0], dims[0], checkConvolution, MKLDNNGraphTestClass::CheckDynBatchType::Child);
-            graph.checkDynBatch(srcs, outputBlobs, 1, dims[0], checkConvolution, MKLDNNGraphTestClass::CheckDynBatchType::Child);
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchConvolutionTests, TestsDynBatchConvolution) {}
-
-// TODO: rewrite to ngraph to have reshape functionality
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_TestDynBatchConvolution, MKLDNNGraphDynBatchConvolutionTests,
-    ::testing::Values(
-                conv_test_params{{1, 8, 16, 32},
-                                 {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "same_upper", 7, MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_1x1,
-                                 false, {MKLDNNPlugin::impl_desc_type::jit_avx512_winograd}},
-                conv_test_params{{1, 9, 32, 16},
-                                 {2, 4}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit,
-                                 false, {MKLDNNPlugin::impl_desc_type::jit_avx512_winograd} },
-                conv_test_params{{1, 9, 32, 16},
-                                 {2, 4}, {2, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit,
-                                 false, {MKLDNNPlugin::impl_desc_type::jit_avx512_winograd} },
-                conv_test_params{{1, 3, 40, 40},
-                                 {3, 3}, {1, 2}, {0, 0}, {0, 0}, 20, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit,
-                                 false, {MKLDNNPlugin::impl_desc_type::jit_avx512_winograd} },
-                conv_test_params{{1, 1, 40, 40},
-                                 {3, 3}, {1, 2}, {0, 0}, {0, 0}, 20, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit,
-                                 false, {MKLDNNPlugin::impl_desc_type::jit_avx512_winograd} },
-                conv_test_params{{1, 1, 32, 16},
-                                 {2, 4}, {2, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit,
-                                 false, {MKLDNNPlugin::impl_desc_type::jit_avx512_winograd} },
-                conv_test_params{{1, 9, 32, 16},
-                                 {2, 4}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::ref_any,
-                                 false, {MKLDNNPlugin::impl_desc_type::ref_any} } ));
-
-#ifdef USE_MKL
-INSTANTIATE_TEST_CASE_P(
-    MKLTestDynBatchConvolution, MKLDNNGraphDynBatchConvolutionTests,
-    ::testing::Values(
-                conv_test_params{{1, 9, 16, 32},
-                                 {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 7, MKLDNNPlugin::impl_desc_type::gemm, false,
-                                 {MKLDNNPlugin::impl_desc_type::gemm_any,
-                                  MKLDNNPlugin::impl_desc_type::gemm_blas,
-                                  MKLDNNPlugin::impl_desc_type::gemm_avx512,
-                                  MKLDNNPlugin::impl_desc_type::gemm_avx2,
-                                  MKLDNNPlugin::impl_desc_type::gemm_sse42}
-                }));
-#endif
-
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp
deleted file mode 100644
index c95f0df5c3b..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp
+++ /dev/null
@@ -1,555 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "ir_gen_helper.hpp"
-#include "tests_common.hpp"
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-#include <ie_system_conf.h>
-
-using namespace InferenceEngine;
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-using namespace single_layer_tests;
-
-
-struct deconv_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> dims;
-    // Formats: WH, WHD
-    vector<size_t> kernel;
-    vector<size_t> strides;
-    vector<size_t> pads_begin;
-    vector<size_t> pads_end;
-
-    size_t out_c;
-    size_t grp_c;
-
-    bool with_bias;
-    string auto_pad;
-
-    size_t num_prim_desc;
-
-    std::vector<int> selectedTypes;
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_deconv(const InferenceEngine::TBlob<data_t> &src, const InferenceEngine::Blob::Ptr &weights, const InferenceEngine::Blob::Ptr &bias,
-                InferenceEngine::TBlob<data_t> &dst, struct deconv_test_params prm) {
-    auto dims_size = src.getTensorDesc().getDims().size();
-
-    size_t G  = prm.grp_c;
-    size_t KW = prm.kernel[X_AXIS];
-    size_t KH = prm.kernel[Y_AXIS];
-    size_t KD = prm.kernel.size() > Z_AXIS ? prm.kernel[Z_AXIS] : 1u;
-
-    size_t PW = prm.pads_begin[X_AXIS];
-    size_t PH = prm.pads_begin[Y_AXIS];
-    size_t PD = prm.pads_begin.size() > Z_AXIS ? prm.pads_begin[Z_AXIS] : 0u;
-
-    size_t SW = prm.strides[X_AXIS];
-    size_t SH = prm.strides[Y_AXIS];
-    size_t SD = prm.strides.size() > Z_AXIS ? prm.strides[Z_AXIS] : 1u;
-
-    size_t IW = src.getTensorDesc().getDims()[dims_size - 1];
-    size_t IH = src.getTensorDesc().getDims()[dims_size - 2];
-    size_t ID = dims_size == 5 ? src.getTensorDesc().getDims()[dims_size - 3] : 1u;
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t MB = src.getTensorDesc().getDims()[0];
-
-    size_t OC = prm.out_c;
-
-    size_t OW = SW * (IW - 1lu) + KW - 2lu * PW;
-    size_t OH = SH * (IH - 1lu) + KH - 2lu * PH;
-    size_t OD = dims_size == 5 ? (SD * (ID - 1) + KD - 2 * PD) : 1u;
-
-    const data_t *src_data = src.readOnly();
-    const data_t *weights_data = weights->buffer().as<data_t*>();
-    const data_t *bias_data = bias->buffer().as<data_t*>();
-
-    data_t *dst_data = dst.data();
-
-    size_t CS1 = OH * OW;
-    size_t CS2 = CS1 * OD;
-    size_t CS3 = CS2 * OC;
-
-    size_t CI1 = IH * IW;
-    size_t CI2 = CI1 * ID;
-    size_t CI3 = CI2 * IC;
-    
-    size_t OC_G = OC / G;
-    size_t IC_G = IC / G;
-
-    size_t CK1 = KH * KW;
-    size_t CK2 = CK1 * KD;
-    size_t CK3 = CK2 * OC_G;
-    size_t CK4 = CK3 * IC_G;
-
-    for (size_t g = 0lu; g < G; ++g) {
-        size_t g_OC_G = g * OC_G;
-        size_t g_IC_G = g * IC_G;
-        size_t g_CK4 = g * CK4;
-        for (size_t mb = 0lu; mb < MB; ++mb) {
-            size_t mb_CS3 = mb * CS3;
-            size_t mb_CI3 = mb * CI3;
-            for (size_t oc = 0lu; oc < OC_G; ++oc) {
-                size_t g_OC_G_oc = g_OC_G + oc;
-                size_t mb_CS3_g_OC_G_oc_CS2 = mb_CS3 + g_OC_G_oc * CS2;
-                size_t g_CK4_oc_CK2 = g_CK4 + oc * CK2;
-                for (size_t od = 0lu; od < OD; ++od) {
-                    size_t mb_CS3_g_OC_G_oc_CS2_od_CS1 = mb_CS3_g_OC_G_oc_CS2 + od * CS1;
-                    size_t od_PD = od + PD;
-                    for (size_t oh = 0lu; oh < OH; ++oh) {
-                        size_t mb_CS3_g_OC_G_oc_CS2_od_CS1_oh_OW = mb_CS3_g_OC_G_oc_CS2_od_CS1 + oh * OW;
-                        size_t oh_PH = oh + PH;
-                        for (size_t ow = 0lu; ow < OW; ++ow) {
-                            size_t didx = mb_CS3_g_OC_G_oc_CS2_od_CS1_oh_OW + ow;
-                            size_t ow_PW = ow + PW;
-
-                            dst_data[didx] = data_t(0);
-                            if (prm.with_bias) dst_data[didx] += bias_data[g_OC_G_oc];
-
-                            for (size_t ic = 0lu; ic < IC_G; ic++) {
-                                size_t mb_CI3_g_IC_G_ic_CI2 = mb_CI3 + (g_IC_G + ic) * CI2;
-                                size_t g_CK4_oc_CK2_ic_CK3 = g_CK4_oc_CK2 + ic * CK3;
-                                for (int kd = 0lu; kd < KD; kd++) {
-                                    if (od_PD < kd) continue;
-                                    size_t id = od_PD - kd;
-                                    if (id % SD != 0) continue;
-                                    id /= SD;
-                                    if (id >= ID) continue;
-                                    size_t mb_CI3_g_IC_G_ic_CI2_id_CI1 = mb_CI3_g_IC_G_ic_CI2 + id * CI1;
-                                    size_t g_CK4_oc_CK2_ic_CK3_kd_CK1 = g_CK4_oc_CK2_ic_CK3 + kd * CK1;
-                                    for (size_t kh = 0lu; kh < KH; kh++) {
-                                        if (oh_PH < kh) continue;
-                                        size_t ih = oh_PH - kh;
-                                        if (ih % SH != 0) continue;
-                                        ih /= SH;
-                                        if (ih >= IH) continue;
-                                        size_t mb_CI3_g_IC_G_ic_CI2_id_CI1_ih_IW = mb_CI3_g_IC_G_ic_CI2_id_CI1 + ih * IW;
-                                        size_t g_CK4_oc_CK2_ic_CK3_kd_CK1_kh_KW = g_CK4_oc_CK2_ic_CK3_kd_CK1 + kh * KW;
-                                        for (size_t kw = 0lu; kw < KW; kw++) {
-                                            if (ow_PW < kw) continue;
-                                            size_t iw = ow_PW - kw;
-                                            if (iw % SW != 0) continue;
-                                            iw /= SW;
-                                            if (iw >= IW) continue;
-
-                                            size_t sidx = mb_CI3_g_IC_G_ic_CI2_id_CI1_ih_IW + iw;
-
-                                            size_t widx = g_CK4_oc_CK2_ic_CK3_kd_CK1_kh_KW + kw;
-
-                                            dst_data[didx] += src_data[sidx] * weights_data[widx];
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNGraphDeconvolutionalTests: public TestsCommon,
-                                     public WithParamInterface<deconv_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="deconv1" id="1" type="Deconvolution" precision="FP32">
-            <deconvolution _AP_ kernel="_K_"
-                         pads_begin="_PB_"  pads_end="_PE_"
-                         strides="_KS_"
-                         output="_OC_" group="_GC_" PrimitivesPriority="_IMPLS_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="1">
-                    __SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    __DST_DIMS__
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-)V0G0N";
-
-protected:
-    std::string getModel(deconv_test_params p) {
-        std::string model = layers_t;
-
-        std::string s_dims;
-        for (auto& dim : p.dims) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS__", s_dims);
-
-        s_dims = "";
-        int k_len = p.kernel.size();
-        for (size_t i = 2; i < p.dims.size(); i++) {
-            size_t inx = k_len - i + 1;
-            size_t dim = p.strides[inx] * (p.dims[i] - 1) + p.kernel[inx] - 2 * p.pads_begin[inx];
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__DST_DIMS__", s_dims);
-        REPLACE_WITH_NUM(model, "_IN_", p.dims[0]);
-
-        if (!p.with_bias) REMOVE_LINE(model, "<biases offset=\"_S1_\" size=\"_S2_\" />");
-
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.kernel);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.strides);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.pads_begin);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.pads_end);
-        REPLACE_WITH_NUM(model, "_GC_", p.grp_c);
-        REPLACE_WITH_NUM(model, "_OC_", p.out_c);
-        string auto_pad;
-        if (!p.auto_pad.empty()) auto_pad = string("auto_pad=") + string("\"") + p.auto_pad + string("\"");
-        REPLACE_WITH_STR(model, "_AP_", auto_pad);
-
-        size_t blob_size = p.out_c * (p.dims[1] / p.grp_c);
-        for (auto k : p.kernel) {
-            blob_size *= k;
-        }
-        size_t w_data_size = blob_size * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-
-        size_t b_data_size = p.out_c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-
-        model = IRTemplateGenerator::getIRTemplate("Deconvolution_Only", p.dims, "FP32", model, edges_t);
-
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            deconv_test_params p = ::testing::WithParamInterface<deconv_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            size_t blob_size = p.out_c * (p.dims[1] / p.grp_c);
-            for (auto k : p.kernel) {
-                blob_size *= k;
-            }
-            InferenceEngine::SizeVector dims_weights = { blob_size };
-
-            std::vector<InferenceEngine::Blob::Ptr> blob_to_model;
-            InferenceEngine::Blob::Ptr weights = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, dims_weights, InferenceEngine::C });
-            weights->allocate();
-            fill_data(weights->buffer().as<float*>(), weights->size());
-            blob_to_model.push_back(weights);
-
-            InferenceEngine::Blob::Ptr bias = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, {p.out_c}, InferenceEngine::C });
-            bias->allocate();
-            fill_data(bias->buffer().as<float*>(), bias->size());
-            blob_to_model.push_back(bias);
-
-            size_t total_size_in_bytes = 0;
-            for (InferenceEngine::Blob::Ptr blb : blob_to_model) total_size_in_bytes += blb->byteSize();
-
-            InferenceEngine::TBlob<uint8_t>::Ptr model_blob =
-                    InferenceEngine::make_shared_blob<uint8_t>({ InferenceEngine::Precision::U8, {total_size_in_bytes}, InferenceEngine::C });
-            model_blob->allocate();
-            uint8_t* model_blob_ptr = model_blob->buffer().as<uint8_t*>();
-            for (InferenceEngine::Blob::Ptr blb : blob_to_model) {
-                memcpy(model_blob_ptr, blb->buffer().as<uint8_t*>(), blb->byteSize());
-                model_blob_ptr += blb->byteSize();
-            }
-            
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, model_blob));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (auto &node : nodes) {
-                if (node->getType() == MKLDNNPlugin::Deconvolution) {
-                    ASSERT_LE(p.num_prim_desc, node->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor());
-                    bool good_prim = false;
-                    for (auto & selected : p.selectedTypes)
-                        if (selected == (node->getSelectedPrimitiveDescriptor()->getImplementationType() & selected))
-                            good_prim = true;
-                    ASSERT_TRUE(good_prim);
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = p.dims;
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(
-                    {InferenceEngine::Precision::FP32, dims_src, InferenceEngine::TensorDesc::getLayoutByDims(p.dims)});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_deconv(*srcPtr, weights, bias, dst_ref, p);
-
-            compare(*output, dst_ref, 0.0002f);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDeconvolutionalTests, TestsDeconvolution) {}
-
-//  deconv_test_params(dims, kernel, strides, pads_begin, pads_end, out_c, grp_c, with_bias, auto_pad, num_prim_desc,
-//                     selectedTypes, preferTypes, comp)
-
-size_t  expected_num_prim_desc = InferenceEngine::with_cpu_x86_avx2() ? 3 : 2;
-
-
-INSTANTIATE_TEST_CASE_P(
-    TestDeconvolution, MKLDNNGraphDeconvolutionalTests,
-    ::testing::Values(
-        /*0*/   deconv_test_params{{1, 3, 3, 3}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{3, 3, 3, 3}, {4, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
-                deconv_test_params{{2, 8, 5, 5}, {8, 8}, {4, 4}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
-                deconv_test_params{{2, 8, 5, 5}, {4, 8}, {2, 4}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
-        /*5*/   deconv_test_params{{1, 3, 3, 3}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::ref_any},
-                                   {MKLDNNPlugin::impl_desc_type::ref_any}}
-        ));
-
-INSTANTIATE_TEST_CASE_P(
-    TestDeconvolutionWithBias, MKLDNNGraphDeconvolutionalTests,
-    ::testing::Values(
-        /*0*/   deconv_test_params{{1, 3, 3, 3}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{3, 3, 3, 3}, {4, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 8, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
-                deconv_test_params{{2, 128, 3, 3}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 128, 128, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
-                deconv_test_params{{2, 8, 5, 5}, {8, 8}, {4, 4}, {1, 1}, {0, 0}, 8, 8, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
-        /*5*/   deconv_test_params{{2, 8, 5, 5}, {4, 8}, {2, 4}, {1, 1}, {0, 0}, 8, 8, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
-                deconv_test_params{{2, 16, 5, 5}, {4, 8}, {2, 4}, {1, 1}, {0, 0}, 16, 16, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
-                deconv_test_params{{2, 8, 5, 5}, {1, 3}, {1, 1}, {0, 1}, {0, 1}, 8, 8, true, "", 2,
-                                   {MKLDNNPlugin::impl_desc_type::ref_any}, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                deconv_test_params{{1, 6, 6, 5}, {3, 1}, {1, 1}, {1, 0}, {1, 0}, 9, 3, true, "", 2,
-                                   {MKLDNNPlugin::impl_desc_type::ref_any}, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                deconv_test_params{{2, 24, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 24, 3, true, "",
-                                   InferenceEngine::with_cpu_x86_avx2() ? 4ul : 3ul,
-                                   {MKLDNNPlugin::impl_desc_type::jit}},
-        /*10*/  deconv_test_params{{2, 48, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 48, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
-                deconv_test_params{{2, 48, 3, 3}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 192, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
-                deconv_test_params{{2, 24, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 24, 1, true, "", 3, {MKLDNNPlugin::impl_desc_type::jit}},
-                deconv_test_params{{2, 72, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 72, 3, true, "",
-                                   InferenceEngine::with_cpu_x86_avx2() ? 4ul : 3ul,
-                                   {MKLDNNPlugin::impl_desc_type::jit}},
-                deconv_test_params{{1, 12, 2, 2}, {4, 4}, {2, 2}, {1, 1}, {1, 1}, 12, 12, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
-// In case of SSE oor pure AVX there is no JIT implementation
-//                deconv_test_params{{1, 32, 5, 5}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, 16, 1, true, "",
-//                                   2, {MKLDNNPlugin::impl_desc_type::jit}},
-                deconv_test_params{{1, 48, 3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, 96, 3, true, "", 2, {MKLDNNPlugin::impl_desc_type::jit}},
-        // 5D
-                deconv_test_params{{1, 2, 8, 5, 5}, {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 4, 1, true, "", 4,
-                                   {MKLDNNPlugin::impl_desc_type::ref_any}, {MKLDNNPlugin::impl_desc_type::ref_any} },
-                deconv_test_params{{1, 6, 5, 5, 5}, {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 9, 3, true, "", 2,
-                                   {MKLDNNPlugin::impl_desc_type::ref_any}, {MKLDNNPlugin::impl_desc_type::ref_any} },
-                deconv_test_params{{2, 24, 5, 5, 5}, {4, 4, 4}, {2, 2, 1}, {1, 1, 1}, {0, 0, 0}, 24, 3, true, "",
-                                   InferenceEngine::with_cpu_x86_avx2() ? 4ul : 3ul,
-                                   {MKLDNNPlugin::impl_desc_type::jit}},
-                deconv_test_params{{2, 48, 5, 5, 5}, {4, 4, 4}, {2, 2, 1}, {1, 1, 1}, {0, 0, 0}, 48, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}}
-        // Blocked, with biases
-        // TODO support on jit
-//                deconv_test_params{{2, 24, 5, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 24, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
-//                deconv_test_params{{2, 24, 5, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 24, 1, true, "", 3, {MKLDNNPlugin::impl_desc_type::jit}},
-//                deconv_test_params{{2, 72, 5, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 72, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}}
-        ));
-
-#ifdef USE_MKL
-INSTANTIATE_TEST_CASE_P(
-    MKLTestDeconvolution, MKLDNNGraphDeconvolutionalTests,
-    ::testing::Values(
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {1, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{4, 17, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 2, false, "", 3, {MKLDNNPlugin::impl_desc_type::gemm}},
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {1, 2}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{4, 17, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 2, true, "", 3, {MKLDNNPlugin::impl_desc_type::gemm}},
-                deconv_test_params{{1, 6, 6, 5}, {3, 1}, {1, 1}, {1, 0}, {1, 0}, 9, 3, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
-                deconv_test_params{{1, 64, 12, 12, 2}, {2, 2, 2}, {2, 2, 2}, {0, 0, 0}, {1, 0, 0}, 32, 1, true, "", 4,
-                    {MKLDNNPlugin::impl_desc_type::gemm_blas, MKLDNNPlugin::impl_desc_type::jit_avx512 }},
-                deconv_test_params{{1, 32, 12, 12, 2}, {2, 2, 2}, {2, 2, 2}, {0, 0, 0}, {1, 0, 0}, 16, 1, true, "", 4, 
-                    {MKLDNNPlugin::impl_desc_type::gemm_blas, MKLDNNPlugin::impl_desc_type::jit_avx512 } },
-                deconv_test_params{{1, 25, 1, 1, 1}, {4, 4, 4}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 64, 1, true, "valid", 3,
-                    {MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{1, 32, 16, 16, 16}, {4, 4, 4}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, 1, 1, true, "same_upper", 3,
-                    {MKLDNNPlugin::impl_desc_type::gemm_blas, MKLDNNPlugin::impl_desc_type::jit_avx512 } },
-                deconv_test_params{{1, 64, 12, 12, 2}, {2, 2, 2}, {2, 2, 2}, {0, 0, 0}, {1, 0, 0}, 32, 1, true, "same_upper", 3,
-                    {MKLDNNPlugin::impl_desc_type::gemm_blas, MKLDNNPlugin::impl_desc_type::jit_avx512 } },
-                deconv_test_params{{1, 50, 1, 1, 1}, {4, 4, 4}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 128, 1, true, "", 3,
-                    {MKLDNNPlugin::impl_desc_type::gemm_blas, MKLDNNPlugin::impl_desc_type::jit_avx512 },
-                    {MKLDNNPlugin::impl_desc_type::gemm_blas, MKLDNNPlugin::impl_desc_type::jit_avx512 }} ));
-#endif
-
-
-class MKLDNNGraphDynBatchDeconvolutionalTests: public MKLDNNGraphDeconvolutionalTests {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            deconv_test_params p = ::testing::WithParamInterface<deconv_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.dims[0];
-            if (MB < 2)
-                MB = 2;
-            
-            size_t blob_size = 1;
-            for (auto k : p.kernel) {
-                blob_size *= k;
-            }
-            InferenceEngine::SizeVector dims_weights = {blob_size * p.out_c * (p.dims[1] / p.grp_c)};
-
-            std::vector<InferenceEngine::Blob::Ptr> blob_to_model;
-            InferenceEngine::Blob::Ptr weights = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, dims_weights, InferenceEngine::C });
-            weights->allocate();
-            fill_data(weights->buffer().as<float*>(), weights->size());
-            blob_to_model.push_back(weights);
-
-            InferenceEngine::Blob::Ptr bias = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, {p.out_c}, InferenceEngine::C });
-            bias->allocate();
-            fill_data(bias->buffer().as<float*>(), bias->size());
-            blob_to_model.push_back(bias);
-
-            size_t total_size_in_bytes = 0;
-            for (InferenceEngine::Blob::Ptr blb : blob_to_model) total_size_in_bytes += blb->byteSize();
-
-            InferenceEngine::TBlob<uint8_t>::Ptr model_blob =
-                    InferenceEngine::make_shared_blob<uint8_t>({ InferenceEngine::Precision::U8, {total_size_in_bytes}, InferenceEngine::C });
-            model_blob->allocate();
-            uint8_t* model_blob_ptr = model_blob->buffer().as<uint8_t*>();
-            for (InferenceEngine::Blob::Ptr blb : blob_to_model) {
-                memcpy(model_blob_ptr, blb->buffer().as<uint8_t*>(), blb->byteSize());
-                model_blob_ptr += blb->byteSize();
-            }
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, model_blob));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(
-                    {InferenceEngine::Precision::FP32, p.dims, InferenceEngine::TensorDesc::getLayoutByDims(p.dims)});
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkDeconvolution = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Deconvolution;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkDeconvolution, MKLDNNGraphTestClass::CheckDynBatchType::Child);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkDeconvolution, MKLDNNGraphTestClass::CheckDynBatchType::Child);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchDeconvolutionalTests, TestsDynBatchDeconvolutional) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDynBatchDeconvolutional, MKLDNNGraphDynBatchDeconvolutionalTests,
-        ::testing::Values(
-                // TODO: rewrite to ngraph to have reshape functionality
-                // deconv_test_params{{1, 3, 3, 3}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 5, {MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{3, 3, 3, 3}, {4, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 5, {MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{ {2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw} },
-                deconv_test_params{ {2, 8, 5, 5}, {8, 8}, {4, 4}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw} },
-                deconv_test_params{ {2, 8, 5, 5}, {4, 8}, {2, 4}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw} } ));
-
-#ifdef USE_MKL
-INSTANTIATE_TEST_CASE_P(
-        MKLTestsDynBatchDeconvolutional, MKLDNNGraphDynBatchDeconvolutionalTests,
-        ::testing::Values(
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {1, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 4, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 3, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{4, 17, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 3, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 2, false, "", 3, {MKLDNNPlugin::impl_desc_type::gemm}} ));
-#endif
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp
deleted file mode 100644
index e00f6da874b..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp
+++ /dev/null
@@ -1,456 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-#include <ie_system_conf.h>
-
-using namespace MKLDNNPlugin;
-using namespace mkldnn;
-using namespace ::testing;
-
-using std::vector;
-using std::function;
-
-constexpr auto depthwise_scale_shift = mkldnn::algorithm::depthwise_scale_shift;
-constexpr auto depthwise_prelu = mkldnn::algorithm::depthwise_prelu;
-
-struct depthwise_test_params {
-    algorithm alg;
-
-    // Formats: NC, CHW (actually NCH), NCHW, NCDHW
-    vector<size_t> dims;
-
-    bool isBroadcast;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_depthwise(const InferenceEngine::TBlob<data_t> &src, const data_t *weights, const size_t weightsSize,
-                   InferenceEngine::TBlob<data_t> &dst, depthwise_test_params prm) {
-    auto dims_size = src.getTensorDesc().getDims().size();
-
-    size_t MB = src.getTensorDesc().getDims()[0];
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t ID = dims_size == 5 ? src.getTensorDesc().getDims()[2] : 1u;
-    size_t IH = dims_size < 3 ? 1 : dims_size == 3 ? src.getTensorDesc().getDims()[dims_size - 1]
-                                                   : src.getTensorDesc().getDims()[dims_size - 2];
-    size_t IW = dims_size < 4 ? 1 : src.getTensorDesc().getDims()[dims_size - 1];
-
-    const data_t *src_data = src.readOnly();
-    const data_t *weights_data = weights;
-    size_t bias_offset = prm.isBroadcast ? 1 : IC;
-    const data_t *bias_data = weights_data + bias_offset;
-    data_t *dst_data = dst.data();
-
-    size_t c1 = IH * IW;
-    size_t c2 = ID * c1;
-    size_t c3 = IC * c2;
-    for (int mb = 0; mb < MB; mb++) {
-        size_t m1 = mb * c3;
-        for (int c = 0; c < IC; c++) {
-            size_t m2 = m1 + c * c2;
-            for (int d = 0; d < ID; d++) {
-                size_t m3 = m2 + d * c1;
-                for (int h = 0; h < IH; h++) {
-                    size_t m4 = m3 + h * IW;
-                    for (int w = 0; w < IW; w++) {
-                        int idx = m4 + w;
-
-                        int widx = prm.isBroadcast ? 0 : c;
-                        int bidx = prm.isBroadcast ? 0 : c;
-
-                        if (prm.alg == depthwise_scale_shift)
-                            dst_data[idx] = src_data[idx] * weights_data[widx] + bias_data[bidx];
-                        else if (prm.alg == depthwise_prelu)
-                            dst_data[idx] = src_data[idx] > 0 ? src_data[idx] : src_data[idx]*weights_data[widx];
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNGraphDepthwiseTests: public TestsCommon,
-                                     public WithParamInterface<depthwise_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Lrn_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="depthwise" id="1" type="_LT_" precision="FP32">
-            <data _P_NAME_="_P_VAL_"  PrimitivesPriority="_IMPLS_"/>
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    std::string getModel(depthwise_test_params p) {
-        std::string model = model_t;
-        auto dims_size = p.dims.size();
-
-        if (dims_size < 5)
-            REMOVE_LINE(model, "<dim>_ID_</dim>");
-        if (dims_size < 4)
-            REMOVE_LINE(model, "<dim>_IW_</dim>");
-        if (dims_size < 3)
-            REMOVE_LINE(model, "<dim>_IH_</dim>");
-
-        REPLACE_WITH_NUM(model, "_IN_", p.dims[0]);
-        REPLACE_WITH_NUM(model, "_IC_", p.dims[1]);
-
-        if (dims_size > 2)
-            REPLACE_WITH_NUM(model, "_IH_", dims_size == 3 ? p.dims[dims_size - 1] : p.dims[dims_size - 2]);
-        if (dims_size > 3)
-            REPLACE_WITH_NUM(model, "_IW_", p.dims[dims_size - 1]);
-        if (dims_size > 4)
-            REPLACE_WITH_NUM(model, "_ID_", p.dims[dims_size - 3]);
-
-        if (p.alg == depthwise_scale_shift) {
-            REPLACE_WITH_STR(model, "_LT_", "ScaleShift");
-            REPLACE_WITH_STR(model, "_P_NAME_", "broadcast");
-            REPLACE_WITH_NUM(model, "_P_VAL_", p.isBroadcast ? 1 : 0);
-        }
-        else if (p.alg == depthwise_prelu) {
-            REPLACE_WITH_STR(model, "_LT_", "PReLU");
-            REPLACE_WITH_STR(model, "_P_NAME_", "channel_shared");
-            REPLACE_WITH_NUM(model, "_P_VAL_", p.isBroadcast ? 1 : 0);
-        }
-
-        size_t array_size =  p.isBroadcast ? 1 : p.dims[1];
-        size_t w_data_size = array_size * sizeof(float);
-        size_t b_data_size = array_size * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-
-        return model;
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            depthwise_test_params p = ::testing::WithParamInterface<depthwise_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            size_t weightSize = 2 * p.dims[1] * sizeof(float);
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {weightSize}, InferenceEngine::C });
-            weights->allocate();
-            fill_data( weights->data().as<float*>(), weights->size() / sizeof(float));
-
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Depthwise) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                              nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = p.dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.dims.size()) {
-                case 2: layout = InferenceEngine::NC; break;
-                // InferenceEngine::Layout doesn't have alias for 3D NCH layout so we use CHW instead
-                case 3: layout = InferenceEngine::CHW; break;
-                case 4: layout = InferenceEngine::NCHW; break;
-                case 5: layout = InferenceEngine::NCDHW; break;
-            }
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_depthwise(*srcPtr, weights->readOnly().as<const float*>(), weights->size() / sizeof(float), dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDepthwiseTests, TestsDepthwise) {}
-
-const size_t num_2d_impl = InferenceEngine::with_cpu_x86_avx2() ? 3 : 2;
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDepthwise, MKLDNNGraphDepthwiseTests,
-        ::testing::Values(
-                // 2D
-                depthwise_test_params{depthwise_scale_shift, {128, 32}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_scale_shift, {4,   3 }, true,  num_2d_impl, jit},
-                depthwise_test_params{depthwise_scale_shift, {1,   1 }, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_scale_shift, {37,  35}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_prelu,       {128, 32}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_prelu,       {4,   3 }, true,  num_2d_impl, jit},
-                depthwise_test_params{depthwise_prelu,       {1,   1 }, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_prelu,       {37,  35}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_scale_shift, {128, 32}, false, num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {4,   3 }, true,  num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {1,   1 }, false, num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_prelu,       {128, 32}, false, num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_prelu,       {4,   3 }, true,  num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_prelu,       {1,   1 }, false, num_2d_impl, ref, {ref_any}},
-                // 4D
-                depthwise_test_params{depthwise_scale_shift, {1, 32, 128, 256}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {4, 3, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {1, 32, 128, 256}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {1, 32, 128, 256}, false,3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {4, 3, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {1, 32, 128, 256}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {1, 32, 128, 256}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {4, 3, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {1, 32, 128, 256}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {1, 32, 128, 256}, false,3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {4, 3, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {1, 32, 128, 256}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // 5D
-                depthwise_test_params{depthwise_scale_shift, {1, 32, 16, 128, 256}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {4, 3, 16, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {1, 1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {4, 4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {1, 32, 16, 128, 256}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {4, 3, 16, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {1, 32, 16, 128, 256}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {4, 3, 16, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {1, 1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {4, 4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {1, 32, 16, 128, 256}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {4, 3, 16, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {1, 1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {4, 4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {1, 1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {4, 4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}
-        ));
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDepthwise3D, MKLDNNGraphDepthwiseTests,
-        ::testing::Values(
-                depthwise_test_params{depthwise_scale_shift, {1, 32, 16}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_scale_shift, {8, 32, 16}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_scale_shift, {4, 3, 2}, true,  num_2d_impl, jit},
-                depthwise_test_params{depthwise_scale_shift, {1, 1, 1}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_scale_shift, {37, 35, 17}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_prelu,       {128, 32, 19}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_prelu,       {4, 3, 2}, true,  num_2d_impl, jit},
-                depthwise_test_params{depthwise_prelu,       {1, 1, 1}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_prelu,       {37, 35, 17}, false, num_2d_impl, jit},
-                depthwise_test_params{depthwise_scale_shift, {128, 32, 19}, false, num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {4, 3, 2}, true,  num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {1, 1, 1}, false, num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_prelu,       {128, 32, 17}, false, num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_prelu,       {4, 3, 19}, true,  num_2d_impl, ref, {ref_any}},
-                depthwise_test_params{depthwise_prelu,       {1, 1, 1}, false, num_2d_impl, ref, {ref_any}}
-        ));
-
-class MKLDNNGraphDynBatchDepthwiseTests: public MKLDNNGraphDepthwiseTests {
-protected:
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            depthwise_test_params p = ::testing::WithParamInterface<depthwise_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.dims[0];
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {p.dims[1] * 4 * sizeof(float)}, InferenceEngine::C });
-            weights->allocate();
-            fill_data( weights->data().as<float*>(), weights->size() / sizeof(float));
-            float * data = weights->buffer();
-            for (size_t i = 0; i < weights->size() / sizeof(float); i++) {
-                if (data[i] < 0) {
-                    data[i] *= -1;
-                }
-            }
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = p.dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, layout});
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkDepthwise = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Depthwise;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkDepthwise);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkDepthwise);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchDepthwiseTests, TestsDynBatchDepthwise) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDynBatchDepthwise, MKLDNNGraphDynBatchDepthwiseTests,
-        ::testing::Values(
-                depthwise_test_params{depthwise_scale_shift, {4, 3, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {4, 3, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_prelu, {4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                depthwise_test_params{depthwise_scale_shift, {4, 3, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_scale_shift, {4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {4, 3, 228, 228}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                depthwise_test_params{depthwise_prelu, {4, 4, 10, 10}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}
-                // TODO: rewrite to ngraph to have reshape functionality
-                // depthwise_test_params{depthwise_scale_shift, {1, 32, 128, 256}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // depthwise_test_params{depthwise_scale_shift, {1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // depthwise_test_params{depthwise_scale_shift, {1, 4, 5, 5}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // depthwise_test_params{depthwise_scale_shift, {1, 32, 128, 256}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // depthwise_test_params{depthwise_prelu, {1, 32, 128, 256}, false,3, MKLDNNPlugin::impl_desc_type::jit},
-                // depthwise_test_params{depthwise_prelu, {1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // depthwise_test_params{depthwise_prelu, {1, 4, 5, 5}, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // depthwise_test_params{depthwise_prelu, {1, 32, 128, 256}, true, 3, MKLDNNPlugin::impl_desc_type::jit},
-                // depthwise_test_params{depthwise_scale_shift, {1, 32, 128, 256}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // depthwise_test_params{depthwise_scale_shift, {1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // depthwise_test_params{depthwise_scale_shift, {1, 4, 5, 5}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // depthwise_test_params{depthwise_scale_shift, {1, 32, 128, 256}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // depthwise_test_params{depthwise_prelu, {1, 32, 128, 256}, false,3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // depthwise_test_params{depthwise_prelu, {1, 1, 1, 1}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // depthwise_test_params{depthwise_prelu, {1, 4, 5, 5}, false, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // depthwise_test_params{depthwise_prelu, {1, 32, 128, 256}, true, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
deleted file mode 100644
index 6eb2b52ea88..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
+++ /dev/null
@@ -1,379 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-
-#include "test_graph.hpp"
-
-#include <ie_plugin_config.hpp>
-#include "common_test_utils/data_utils.hpp"
-#include "single_layer_common.hpp"
-#include <mkldnn_extension_utils.h>
-#include <legacy/cnn_network_impl.hpp>
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct eltwise_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> dims1;
-    vector<size_t> dims2;
-    vector<size_t> dims3;
-
-    enum opType {
-        Sum = 0, Prod, Max, Min, Sub, Div, Squared_diff, Floor_mod, Pow,
-        Logical_AND, Logical_OR, Logical_XOR,
-        Less, Less_equal, Greater, Greater_equal, Equal, Not_equal
-    };
-
-    opType op;
-
-    std::string scales;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template<typename data_t>
-void ref_eltwise(const std::vector<InferenceEngine::TBlob<data_t>> &src, InferenceEngine::TBlob<data_t> &dst, eltwise_test_params prm) {
-    std::vector<float> scales;
-    if (prm.scales != "") {
-        std::istringstream stream(prm.scales);
-        stream.imbue(std::locale("C"));
-        std::string str;
-        while (getline(stream, str, ',')) {
-            float val = InferenceEngine::CNNLayer::ie_parse_float(str);
-            scales.push_back(val);
-        }
-    } else {
-        for (int i = 0; i < src.size(); i++) {
-            scales.push_back(1.0f);
-        }
-    }
-
-    data_t *dst_data = dst.data();
-
-    const data_t *src_data = src[0].readOnly();
-    auto& dims = dst.getTensorDesc().getDims();
-    auto& dims0 = src[0].getTensorDesc().getDims();
-
-    int offset_in[5] = {1, 1, 1, 1, 1};
-    int offset_out[5] = {1, 1, 1, 1, 1};
-
-    for (int i = 0; i < dims0.size(); i++)
-        offset_in[5 - dims0.size() + i] = dims0[i];
-    for (int i = 0; i < dims.size(); i++)
-        offset_out[5 - dims.size() + i] = dims[i];
-
-    unsigned long j = 0, k = 0;
-
-    for (int i0 = 0; i0 < offset_out[0]; i0++) {
-        if (i0 > offset_in[0] - 1) {
-            k -= offset_in[1]*offset_in[2]*offset_in[3]*offset_in[4];
-        }
-        for (int i1 = 0; i1 < offset_out[1]; i1++) {
-            if (i1 > offset_in[1] - 1) {
-                k -= offset_in[2]*offset_in[3]*offset_in[4];
-            }
-            for (int i2 = 0; i2 < offset_out[2]; i2++) {
-                if (i2 > offset_in[2] - 1) {
-                    k -= offset_in[3]*offset_in[4];
-                }
-                for (int i3 = 0; i3 < offset_out[3]; i3++) {
-                    if (i3 > offset_in[3] - 1) {
-                        k -= offset_in[4];
-                    }
-                    for (int i4 = 0; i4 < offset_out[4]; i4++) {
-                        if (i4 > offset_in[4] - 1) {
-                            k -= 1;
-                        }
-                        if (prm.op == eltwise_test_params::Sum) {
-                            dst_data[j++] = scales[0] * src_data[k++];
-                        } else {
-                            dst_data[j++] = src_data[k++];
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    for (int n = 1; n < src.size(); n++) {
-        j = 0;
-        k = 0;
-        src_data = src[n].readOnly();
-        auto& dims1 = src[n].getTensorDesc().getDims();
-        int offset_in1[5] = {1, 1, 1, 1, 1};
-        for (int i = 0; i < dims1.size(); i++)
-            offset_in1[5 - dims1.size() + i] = dims1[i];
-
-        for (int i0 = 0; i0 < offset_out[0]; i0++) {
-            if (i0 > offset_in1[0] - 1) {
-                k -= offset_in1[1]*offset_in1[2]*offset_in1[3]*offset_in1[4];
-            }
-            for (int i1 = 0; i1 < offset_out[1]; i1++) {
-                if (i1 > offset_in1[1] - 1) {
-                    k -= offset_in1[2]*offset_in1[3]*offset_in1[4];
-                }
-                for (int i2 = 0; i2 < offset_out[2]; i2++) {
-                    if (i2 > offset_in1[2] - 1) {
-                        k -= offset_in1[3]*offset_in1[4];
-                    }
-                    for (int i3 = 0; i3 < offset_out[3]; i3++) {
-                        if (i3 > offset_in1[3] - 1) {
-                            k -= offset_in1[4];
-                        }
-                        for (int i4 = 0; i4 < offset_out[4]; i4++, j++, k++) {
-                            if (i4 > offset_in1[4] - 1) {
-                                k -= 1;
-                            }
-                            switch (prm.op) {
-                                case eltwise_test_params::Sum:
-                                    dst_data[j] += scales[n] * src_data[k];
-                                    break;
-                                case eltwise_test_params::Sub:
-                                    dst_data[j] = dst_data[j] - src_data[k];
-                                    break;
-                                case eltwise_test_params::Min:
-                                    dst_data[j] = (std::min)(dst_data[j], src_data[k]);
-                                    break;
-                                case eltwise_test_params::Max:
-                                    dst_data[j] = (std::max)(dst_data[j], src_data[k]);
-                                    break;
-                                case eltwise_test_params::Prod:
-                                    dst_data[j] = dst_data[j] * src_data[k];
-                                    break;
-                                case eltwise_test_params::Div:
-                                    dst_data[j] = dst_data[j] / src_data[k];
-                                    break;
-                                case eltwise_test_params::Squared_diff:
-                                    dst_data[j] = (dst_data[j] - src_data[k]) * (dst_data[j] - src_data[k]);
-                                    break;
-                                case eltwise_test_params::Logical_OR:
-                                    dst_data[j] = dst_data[j] || src_data[k];
-                                    break;
-                                case eltwise_test_params::Logical_AND:
-                                    dst_data[j] = dst_data[j] && src_data[k];
-                                    break;
-                                case eltwise_test_params::Logical_XOR:
-                                    dst_data[j] = (dst_data[j] || src_data[k]) - (dst_data[j] && src_data[k]);
-                                    break;
-                                case eltwise_test_params::Less:
-                                    dst_data[j] = dst_data[j] < src_data[k];
-                                    break;
-                                case eltwise_test_params::Less_equal:
-                                    dst_data[j] = dst_data[j] <= src_data[k];
-                                    break;
-                                case eltwise_test_params::Greater:
-                                    dst_data[j] = dst_data[j] > src_data[k];
-                                    break;
-                                case eltwise_test_params::Greater_equal:
-                                    dst_data[j] = dst_data[j] >= src_data[k];
-                                    break;
-                                case eltwise_test_params::Equal:
-                                    dst_data[j] = dst_data[j] == src_data[k];
-                                    break;
-                                case eltwise_test_params::Not_equal:
-                                    dst_data[j] = dst_data[j] != src_data[k];
-                                    break;
-                                case eltwise_test_params::Pow:
-                                    dst_data[j] = std::pow(dst_data[j], src_data[k]);
-                                    break;
-                                case eltwise_test_params::Floor_mod:
-                                    dst_data[j] = dst_data[j] - dst_data[j] / src_data[k] * src_data[k];
-                                    break;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-std::string select_op(eltwise_test_params::opType op) {
-    std::string str_op;
-    switch(op){
-        case eltwise_test_params::opType::Sum:
-            str_op = "sum";
-            break;
-        case eltwise_test_params::opType::Prod:
-            str_op = "prod";
-            break;
-        case eltwise_test_params::opType::Max:
-            str_op = "max";
-            break;
-        case eltwise_test_params::opType::Min:
-            str_op = "min";
-            break;
-        case eltwise_test_params::opType::Sub:
-            str_op = "sub";
-            break;
-        case eltwise_test_params::opType::Div:
-            str_op = "div";
-            break;
-        case eltwise_test_params::opType::Squared_diff:
-            str_op = "squared_diff";
-            break;
-        case eltwise_test_params::opType::Logical_AND:
-            str_op = "logical_and";
-            break;
-        case eltwise_test_params::opType::Logical_OR:
-            str_op = "logical_or";
-            break;
-        case eltwise_test_params::opType::Logical_XOR:
-            str_op = "logical_xor";
-            break;
-        case eltwise_test_params::opType ::Less:
-            str_op = "less";
-            break;
-        case eltwise_test_params::opType::Less_equal:
-            str_op = "less_equal";
-            break;
-        case eltwise_test_params::opType::Greater:
-            str_op = "greater";
-            break;
-        case eltwise_test_params::opType::Greater_equal:
-            str_op = "greater_equal";
-            break;
-        case eltwise_test_params::opType::Equal:
-            str_op = "equal";
-            break;
-        case eltwise_test_params::opType::Not_equal:
-            str_op = "not_equal";
-            break;
-        case eltwise_test_params::opType::Pow:
-            str_op = "pow";
-            break;
-        case eltwise_test_params::opType::Floor_mod:
-            str_op = "floor_mod";
-            break;
-    }
-    return str_op;
-}
-
-struct precisions_test_2params {
-    struct {
-        std::string precision0;
-        std::string precision1;
-    } in;
-
-    size_t num_nodes;
-    size_t num_reorder_nodes;
-};
-
-class MKLDNNGraphEltwise2PrecisionsTests : public TestsCommon,
-                                     public WithParamInterface<precisions_test_2params> {
-
-    std::string model_t = R"V0G0N(
-<net name="default" version="2" batch="1">
-    <layers>
-        <layer name="second_input" type="Input" precision="_IP1_" id="1">
-            <output>
-                <port id="1" precision="_IP1_">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>3</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="data" type="Input" precision="_IP0_" id="0">
-            <output>
-                <port id="0" precision="_IP0_">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>3</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="output" type="Eltwise" precision="FP32" id="2">
-            <elementwise_data operation="sum" coeff=""/>
-            <input>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>3</dim>
-                </port>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>3</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4" precision="FP32">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>3</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="3"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-    std::string getModel(precisions_test_2params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_STR(model, "_IP0_", p.in.precision0);
-        REPLACE_WITH_STR(model, "_IP1_", p.in.precision1);
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            precisions_test_2params p = ::testing::WithParamInterface<precisions_test_2params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            ASSERT_NO_THROW(graph.CreateGraph(network));
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-            ASSERT_EQ(nodes.size(), p.num_nodes);
-
-            size_t actual_reorder_nodes = 0;
-            for (size_t i = 0; i < nodes.size(); i++) {
-                if(nodes[i].get()->getType() == MKLDNNPlugin::Type::Reorder &&
-                    FIND_STR(nodes[i].get()->getName(), "_U8_FP32_"))
-                    actual_reorder_nodes ++;
-            }
-            ASSERT_EQ(actual_reorder_nodes, p.num_reorder_nodes);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphEltwise2PrecisionsTests, TestsEltwise2Precisions) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsEltwise2Precisions, MKLDNNGraphEltwise2PrecisionsTests,
-        ::testing::Values(
-            precisions_test_2params{ {"FP32", "FP32"}, 4, 0 },
-            precisions_test_2params{ {  "U8", "FP32"}, 4, 0 },
-            precisions_test_2params{ {"FP32",   "U8"}, 4, 0 },
-            precisions_test_2params{ {  "U8",   "U8"}, 4, 0 }
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp
deleted file mode 100644
index 871e8f402d0..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp
+++ /dev/null
@@ -1,337 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct fc_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> in_dims;
-
-    size_t out_c;
-
-    size_t num_prim_desc;
-
-    int selectedType;
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-
-template <typename data_t>
-void ref_innerproduct(const InferenceEngine::TBlob<data_t> &src, const data_t *weights, const size_t weightsSize,
-                      InferenceEngine::TBlob<data_t> &dst, fc_test_params prm) {
-    auto dims_size = src.getTensorDesc().getDims().size();
-
-    size_t IB = src.getTensorDesc().getDims()[0];
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t ID = dims_size == 5 ? src.getTensorDesc().getDims()[dims_size - 3] : 1u;
-    size_t IH = src.getTensorDesc().getDims()[dims_size - 2];
-    size_t IW = src.getTensorDesc().getDims()[dims_size - 1];
-
-    size_t OC = prm.out_c;
-
-    const data_t *src_data = src.readOnly();
-    const data_t *weights_data = weights;
-    const data_t *bias_data = weights_data + IW*IH*ID*IC*OC;
-    data_t *dst_data = dst.data();
-
-    IE_ASSERT( IW*IH*ID*IC*OC + OC == weightsSize );
-    IE_ASSERT( OC == dst.getTensorDesc().getDims()[1] );
-
-    for (size_t n = 0; n < IB; n++) {
-        for (size_t oc = 0; oc < OC; oc++) {
-            dst_data[n*OC + oc] = bias_data[oc];
-            for (size_t ic = 0; ic < IC; ic++) {
-                for (size_t kd = 0; kd < ID; kd++) {
-                    for (size_t kh = 0; kh < IH; kh++) {
-                        for (size_t kw = 0; kw < IW; kw++) {
-                            size_t iidx = n * IC * ID * IH * IW
-                                        + ic * ID * IH * IW
-                                        + kd * IH * IW
-                                        + kh * IW
-                                        + kw;
-                            size_t widx = oc * IC * ID * IH * IW
-                                          + ic * ID * IH * IW 
-                                          + kd * IH * IW 
-                                          + kh * IW 
-                                          + kw;
-
-                            dst_data[n*OC + oc] += src_data[iidx] * weights_data[widx];
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNGraphFullyConnectedTests: public TestsCommon,
-                                      public WithParamInterface<fc_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="FullyConnected_Only" version="3" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">__SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer name="FullyConnected" id="1" type="InnerProduct" precision="FP32">
-            <fc out-size="_OC_" PrimitivesPriority="_IMPLS_"/>
-
-            <weights offset="0" size="_S1_" />
-            <biases offset="_S1_" size="_S2_" />
-
-            <input>
-                <port id="1">__SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    std::string getModel(fc_test_params p) {
-        std::string model = model_t;
-        std::string s_dims;
-        for (auto& dim : p.in_dims) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS__", s_dims);
-
-        REPLACE_WITH_NUM(model, "_IN_", p.in_dims[0]);
-        REPLACE_WITH_NUM(model, "_OC_", p.out_c);
-
-        size_t w_data_size = p.out_c * sizeof(float);
-        for (int i = 1; i < p.in_dims.size(); i++)
-            w_data_size *= p.in_dims[i];
-        size_t b_data_size = p.out_c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_S1_", w_data_size);
-        REPLACE_WITH_NUM(model, "_S2_", b_data_size);
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            fc_test_params p = ::testing::WithParamInterface<fc_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            size_t weights_size = p.out_c;
-            for (int i = 1; i < p.in_dims.size(); i++) {
-                weights_size *= p.in_dims[i];
-            }
-            weights_size = (weights_size + p.out_c) * sizeof(float);
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {weights_size}, InferenceEngine::C });
-            weights->allocate();
-            fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::FullyConnected) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = p.in_dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.in_dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_innerproduct(*srcPtr, (const float *)weights->buffer(), weights->size() / sizeof(float), dst_ref, p);
-
-            compare(*output, dst_ref, 0.9f);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphFullyConnectedTests, TestsFullyConnected) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        TestsFullyConnected, MKLDNNGraphFullyConnectedTests,
-        ::testing::Values(
-                fc_test_params{{1, 3, 227, 227}, 96, 6, MKLDNNPlugin::impl_desc_type::gemm },
-                fc_test_params{{1, 4, 227, 227}, 8, 6, MKLDNNPlugin::impl_desc_type::gemm },
-                fc_test_params{{1, 4, 227, 227}, 10, 6, MKLDNNPlugin::impl_desc_type::gemm },
-                fc_test_params{{1, 3, 227, 227}, 96, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                fc_test_params{{1, 4, 227, 227}, 8, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                fc_test_params{{1, 4, 227, 227}, 10, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                //5D
-                fc_test_params{{1, 4, 32, 32, 32}, 10, 6, MKLDNNPlugin::impl_desc_type::gemm },
-                fc_test_params{{1, 3, 32, 32, 32}, 96, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}));
-
-class MKLDNNGraphDynBatchFullyConnectedTests: public MKLDNNGraphFullyConnectedTests {
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            fc_test_params p = ::testing::WithParamInterface<fc_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.in_dims[0];
-            if (MB < 2)
-                MB = 2;
-
-            size_t weights_size = p.out_c;
-            for (int i = 1; i < p.in_dims.size(); i++) {
-                weights_size *= p.in_dims[i];
-            }
-            weights_size = (weights_size + p.out_c) * sizeof(float);
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {weights_size}, InferenceEngine::C });
-            weights->allocate();
-            fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = p.in_dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.in_dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkFC = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::FullyConnected;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkFC);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkFC);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchFullyConnectedTests, TestsDynBatchFullyConnected) {}
-
-// TODO: rewrite to ngraph to have reshape functionality
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_TestsDynBatchFullyConnected, MKLDNNGraphDynBatchFullyConnectedTests,
-        ::testing::Values(
-                fc_test_params{{1, 3, 227, 227}, 96, 6, MKLDNNPlugin::impl_desc_type::gemm },
-                fc_test_params{{1, 4, 227, 227}, 8, 6, MKLDNNPlugin::impl_desc_type::gemm },
-                fc_test_params{{1, 4, 227, 227}, 10, 6, MKLDNNPlugin::impl_desc_type::gemm },
-                fc_test_params{{1, 3, 227, 227}, 96, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                fc_test_params{{1, 4, 227, 227}, 8, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                fc_test_params{{1, 4, 227, 227}, 10, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp
deleted file mode 100644
index 7ec1753214d..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp
+++ /dev/null
@@ -1,662 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct gemm_test_params {
-    struct {
-        size_t MB1_A;
-        size_t MB2_A;
-        size_t MB1_B;
-        size_t MB2_B;
-        size_t MB1_C;
-        size_t MB2_C;
-        size_t MB1_D;
-        size_t MB2_D;
-    } batches;
-
-    size_t M;
-    size_t N;
-    size_t K;
-
-    float alpha;
-    float beta;
-
-    bool transposeA;
-    bool transposeB;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template<typename data_t>
-void ref_gemm(const std::vector<InferenceEngine::TBlob<data_t>> &src, InferenceEngine::TBlob<data_t> &dst,
-              gemm_test_params prm) {
-    const data_t *src0_data = src[0].readOnly();
-    const data_t *src1_data = src[1].readOnly();
-    const data_t *src2_data = src.size() == 3 ? src[2].readOnly() : dst.readOnly();
-    data_t *dst_data = dst.data();
-
-    size_t MB1 = prm.batches.MB1_D;
-    size_t MB2 = prm.batches.MB2_D;
-    size_t M  = prm.M;
-    size_t N  = prm.N;
-    size_t K  = prm.K;
-
-    for (int mb1 = 0; mb1 < MB1; mb1++) {
-        const data_t *a_data = src0_data;
-        const data_t *b_data = src1_data;
-        const data_t *c_data = src2_data;
-        data_t *d_data = dst_data;
-
-        for (int mb2 = 0; mb2 < MB2; mb2++) {
-            for (int i = 0; i < M; i++) {
-                for (int j = 0; j < N; j++) {
-                    d_data[i * N + j] = src.size() == 3 ? prm.beta * c_data[i * N + j] : 0;
-
-                    for (int k = 0; k < K; k++) {
-                        size_t src0_off = prm.transposeA ? k * M + i : i * K + k;
-                        size_t src1_off = prm.transposeB ? j * K + k : k * N + j;
-                        d_data[i * N + j] += prm.alpha * a_data[src0_off] * b_data[src1_off];
-                    }
-                }
-            }
-            a_data += prm.batches.MB2_A == MB2 ? M*K : 0;
-            b_data += prm.batches.MB2_B == MB2 ? K*N : 0;
-            c_data += prm.batches.MB2_C == MB2 ? M*N : 0;
-            d_data += M*N;
-        }
-
-        src0_data += prm.batches.MB1_A == MB1 ? prm.batches.MB2_A*M*K : 0;
-        src1_data += prm.batches.MB1_B == MB1 ? prm.batches.MB2_B*K*N : 0;
-        src2_data += prm.batches.MB1_C == MB1 ? prm.batches.MB2_C*M*N : 0;
-        dst_data += prm.batches.MB2_D*M*N;
-    }
-}
-
-class MKLDNNGraphGemmTests: public TestsCommon,
-                                     public WithParamInterface<gemm_test_params> {
-    std::string model_t = R"V0G0N(
-<net name="gemmOnly" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>_MB1_A_</dim>
-                    <dim>_MB2_A_</dim>
-                    <dim>_M_A_</dim>
-                    <dim>_N_A_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="1">
-                    <dim>_MB1_B_</dim>
-                    <dim>_MB2_B_</dim>
-                    <dim>_M_B_</dim>
-                    <dim>_N_B_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in3" type="Input" precision="FP32" id="3">
-            <output>
-                <port id="1">
-                    <dim>_MB1_C_</dim>
-                    <dim>_MB2_C_</dim>
-                    <dim>_M_</dim>
-                    <dim>_N_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="gemm" id="4" type="GEMM" precision="FP32">
-            <data alpha="_A_" beta="_B_" transpose_a="_TA_" transpose_b="_TB_"/>
-            <input>
-                <port id="1">
-                    <dim>_MB1_A_</dim>
-                    <dim>_MB2_A_</dim>
-                    <dim>_M_A_</dim>
-                    <dim>_N_A_</dim>
-                </port>
-                <port id="2">
-                    <dim>_MB1_B_</dim>
-                    <dim>_MB2_B_</dim>
-                    <dim>_M_B_</dim>
-                    <dim>_N_B_</dim>
-                </port>
-                <port id="3">
-                    <dim>_MB1_C_</dim>
-                    <dim>_MB2_C_</dim>
-                    <dim>_M_</dim>
-                    <dim>_N_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>_MB1_D_</dim>
-                    <dim>_MB2_D_</dim>
-                    <dim>_M_</dim>
-                    <dim>_N_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="4" to-port="1"/>
-        <edge from-layer="2" from-port="1" to-layer="4" to-port="2"/>
-        <edge from-layer="3" from-port="1" to-layer="4" to-port="3"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-    std::string getModel(gemm_test_params p) {
-        std::string model = model_t;
-        std::string op;
-
-        REPLACE_WITH_NUM(model, "_MB1_A_", p.batches.MB1_A);
-        REPLACE_WITH_NUM(model, "_MB2_A_", p.batches.MB2_A);
-        REPLACE_WITH_NUM(model, "_MB1_B_", p.batches.MB1_B);
-        REPLACE_WITH_NUM(model, "_MB2_B_", p.batches.MB2_B);
-        REPLACE_WITH_NUM(model, "_MB1_C_", p.batches.MB1_C);
-        REPLACE_WITH_NUM(model, "_MB2_C_", p.batches.MB2_C);
-        REPLACE_WITH_NUM(model, "_MB1_D_", p.batches.MB1_D);
-        REPLACE_WITH_NUM(model, "_MB2_D_", p.batches.MB2_D);
-
-        auto m_A = p.transposeA ? p.K : p.M;
-        auto n_A = p.transposeA ? p.M : p.K;
-        auto m_B = p.transposeB ? p.N : p.K;
-        auto n_B = p.transposeB ? p.K : p.N;
-
-        REPLACE_WITH_NUM(model, "_M_A_", m_A);
-        REPLACE_WITH_NUM(model, "_N_A_", n_A);
-        REPLACE_WITH_NUM(model, "_M_B_", m_B);
-        REPLACE_WITH_NUM(model, "_N_B_", n_B);
-
-        REPLACE_WITH_NUM(model, "_M_", p.M);
-        REPLACE_WITH_NUM(model, "_N_", p.N);
-        REPLACE_WITH_NUM(model, "_K_", p.K);
-
-        REPLACE_WITH_NUM(model, "_A_", p.alpha);
-        REPLACE_WITH_NUM(model, "_B_", p.beta);
-        REPLACE_WITH_NUM(model, "_TA_", p.transposeA);
-        REPLACE_WITH_NUM(model, "_TB_", p.transposeB);
-
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            gemm_test_params p = ::testing::WithParamInterface<gemm_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Gemm) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
-                }
-            }
-
-            auto m_A = p.transposeA ? p.K : p.M;
-            auto n_A = p.transposeA ? p.M : p.K;
-            auto m_B = p.transposeB ? p.N : p.K;
-            auto n_B = p.transposeB ? p.K : p.N;
-
-            InferenceEngine::SizeVector dims_src1 = {p.batches.MB1_A, p.batches.MB2_A, m_A, n_A};
-            InferenceEngine::SizeVector dims_src2 = {p.batches.MB1_B, p.batches.MB2_B, m_B, n_B};
-            InferenceEngine::SizeVector dims_src3 = {p.batches.MB1_C, p.batches.MB2_C, p.M, p.N};
-            InferenceEngine::SizeVector dims_dst  = {p.batches.MB1_D, p.batches.MB2_D, p.M, p.N};
-
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::NCHW});
-            src1->allocate();
-            InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
-            if (srcPtr1 == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src1->buffer(), src1->size());
-
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::NCHW});
-            src2->allocate();
-            InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
-            if (srcPtr2 == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src2->buffer(), src2->size());
-
-            InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src3, InferenceEngine::NCHW});
-            src3->allocate();
-            InferenceEngine::TBlob<float>* srcPtr3 = dynamic_cast<InferenceEngine::TBlob<float>*>(src3.get());
-            if (srcPtr3 == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src3->buffer(), src3->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in3", src3));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            std::vector<InferenceEngine::TBlob<float>> src_vec = {*srcPtr1, *srcPtr2, *srcPtr3};
-
-            ref_gemm(src_vec, dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphGemmTests, TestsGemm) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsGemm, MKLDNNGraphGemmTests,
-        ::testing::Values(
-                gemm_test_params{{2, 1, 2, 1, 2, 1, 2, 1}, 3, 3, 2, 1, 1, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any, {
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::gemm_any, impl.getImplementationType());
-                            ASSERT_EQ(3, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                        }
-                } },
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 8, 5, 4, 1, 1, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any, {
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::gemm_any, impl.getImplementationType());
-                            ASSERT_EQ(3, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                        }
-                } },
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 16, 10, 12, 1, 1, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any, {
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::gemm_any, impl.getImplementationType());
-                            ASSERT_EQ(3, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                        }
-                } },
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 11, 10, 20, 1, 1, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any, {
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::gemm_any, impl.getImplementationType());
-                            ASSERT_EQ(3, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                        }
-                } },
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 5, 13, 2, 1, 1, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any, {
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::gemm_any, impl.getImplementationType());
-                            ASSERT_EQ(3, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                        }
-                } },
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 5, 15, 10, 1, 1, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any, {
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::gemm_any, impl.getImplementationType());
-                            ASSERT_EQ(3, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                        }
-                } },
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 5, 6, 7, 2, 0, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 5, 6, 7, 0, 2, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 3, 7, 4, 2, 3, true, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 7, 3, 4, 2, 3, true, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 7, 4, 3, 2, 3, true, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 3, 7, 4, 2, 3, false, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 7, 3, 4, 2, 3, false, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 7, 4, 3, 2, 3, false, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 3, 7, 4, 2, 3, true, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 7, 3, 4, 2, 3, true, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{3, 2, 3, 2, 3, 2, 3, 2}, 7, 4, 3, 2, 3, true, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 2, 3, 2, 3, 2, 3}, 7, 4, 3, 2, 3, true, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 2, 3, 1, 3, 2, 3}, 7, 4, 3, 2, 3, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{2, 3, 1, 3, 1, 3, 2, 3}, 7, 4, 3, 2, 3, false, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{5, 3, 5, 1, 5, 3, 5, 3}, 7, 4, 3, 2, 3, true, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{5, 3, 5, 1, 5, 1, 5, 3}, 7, 4, 3, 2, 3, false, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{5, 1, 5, 1, 5, 3, 5, 3}, 7, 4, 3, 2, 3, true, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 1, 5, 3, 5, 3, 5, 3}, 7, 4, 3, 2, 3, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 1, 1, 1, 5, 3, 5, 3}, 7, 4, 3, 2, 3, true, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{5, 4, 1, 1, 1, 1, 5, 4}, 7, 4, 3, 2, 3, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any}
-        ));
-
-class MKLDNNGraphDynBatchGemmTests: public MKLDNNGraphGemmTests {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            gemm_test_params p = ::testing::WithParamInterface<gemm_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.batches.MB1_D;
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            auto m_A = p.transposeA ? p.K : p.M;
-            auto n_A = p.transposeA ? p.M : p.K;
-            auto m_B = p.transposeB ? p.N : p.K;
-            auto n_B = p.transposeB ? p.K : p.N;
-
-            InferenceEngine::SizeVector dims_src1 = {MB, p.batches.MB2_A, m_A, n_A};
-            InferenceEngine::SizeVector dims_src2 = {MB, p.batches.MB2_B, m_B, n_B};
-            InferenceEngine::SizeVector dims_src3 = {MB, p.batches.MB2_C, p.M, p.N};
-            InferenceEngine::SizeVector dims_dst  = {MB, p.batches.MB2_D, p.M, p.N};
-
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::NCHW});
-            src1->allocate();
-            InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
-            if (srcPtr1 == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src1->buffer(), src1->size());
-
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::NCHW});
-            src2->allocate();
-            InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
-            if (srcPtr2 == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src2->buffer(), src2->size());
-
-            InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src3, InferenceEngine::NCHW});
-            src3->allocate();
-            InferenceEngine::TBlob<float>* srcPtr3 = dynamic_cast<InferenceEngine::TBlob<float>*>(src3.get());
-            if (srcPtr3 == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src3->buffer(), src3->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in3", src3));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto check = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Gemm;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, check);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, check);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchGemmTests, TestsDynBatchGemm) {}
-
-// TODO: rewrite to ngraph to have reshape functionality
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_TestsDynBatchGemm, MKLDNNGraphDynBatchGemmTests,
-        ::testing::Values(
-                gemm_test_params{{1, 3, 1, 3, 1, 3, 1, 3}, 3, 3, 3, 1, 1, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 1, 1, 1, 3, 1, 3}, 16, 15, 12, 1, 1, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any}
-));
-
-class MKLDNNGraphSingleBatchDimGemmTests: public TestsCommon,
-                                     public WithParamInterface<gemm_test_params> {
-    std::string model_t = R"V0G0N(
-<net name="gemmOnly" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>_MB_A_</dim>
-                    <dim>_M_A_</dim>
-                    <dim>_N_A_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="1">
-                    <dim>_MB_B_</dim>
-                    <dim>_M_B_</dim>
-                    <dim>_N_B_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="gemm" id="3" type="GEMM" precision="FP32">
-            <data alpha="_A_" beta="_B_" transpose_a="_TA_" transpose_b="_TB_"/>
-            <input>
-                <port id="1">
-                    <dim>_MB_A_</dim>
-                    <dim>_M_A_</dim>
-                    <dim>_N_A_</dim>
-                </port>
-                <port id="2">
-                    <dim>_MB_B_</dim>
-                    <dim>_M_B_</dim>
-                    <dim>_N_B_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>_MB_D_</dim>
-                    <dim>_M_</dim>
-                    <dim>_N_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="1" to-layer="3" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-    std::string getModel(gemm_test_params p) {
-        std::string model = model_t;
-        std::string op;
-
-        REPLACE_WITH_NUM(model, "_MB_A_", p.batches.MB2_A);
-        REPLACE_WITH_NUM(model, "_MB_B_", p.batches.MB2_B);
-        REPLACE_WITH_NUM(model, "_MB_D_", p.batches.MB2_D);
-
-        auto m_A = p.transposeA ? p.K : p.M;
-        auto n_A = p.transposeA ? p.M : p.K;
-        auto m_B = p.transposeB ? p.N : p.K;
-        auto n_B = p.transposeB ? p.K : p.N;
-
-        REPLACE_WITH_NUM(model, "_M_A_", m_A);
-        REPLACE_WITH_NUM(model, "_N_A_", n_A);
-        REPLACE_WITH_NUM(model, "_M_B_", m_B);
-        REPLACE_WITH_NUM(model, "_N_B_", n_B);
-
-        REPLACE_WITH_NUM(model, "_M_", p.M);
-        REPLACE_WITH_NUM(model, "_N_", p.N);
-        REPLACE_WITH_NUM(model, "_K_", p.K);
-
-        REPLACE_WITH_NUM(model, "_A_", p.alpha);
-        REPLACE_WITH_NUM(model, "_B_", p.beta);
-        REPLACE_WITH_NUM(model, "_TA_", p.transposeA);
-        REPLACE_WITH_NUM(model, "_TB_", p.transposeB);
-
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            gemm_test_params p = ::testing::WithParamInterface<gemm_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Gemm) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
-                }
-            }
-
-            auto m_A = p.transposeA ? p.K : p.M;
-            auto n_A = p.transposeA ? p.M : p.K;
-            auto m_B = p.transposeB ? p.N : p.K;
-            auto n_B = p.transposeB ? p.K : p.N;
-
-            InferenceEngine::SizeVector dims_src1 = {p.batches.MB2_A, m_A, n_A};
-            InferenceEngine::SizeVector dims_src2 = {p.batches.MB2_B, m_B, n_B};
-            InferenceEngine::SizeVector dims_dst  = {p.batches.MB2_D, p.M, p.N};
-
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::CHW});
-            src1->allocate();
-            InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
-            if (srcPtr1 == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src1->buffer(), src1->size());
-
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::CHW});
-            src2->allocate();
-            InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
-            if (srcPtr2 == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src2->buffer(), src2->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            std::vector<InferenceEngine::TBlob<float>> src_vec = {*srcPtr1, *srcPtr2};
-
-            ref_gemm(src_vec, dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphSingleBatchDimGemmTests, TestsGemm) {}
-
-// TODO: rewrite to ngraph to have reshape functionality
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_TestsGemm, MKLDNNGraphSingleBatchDimGemmTests,
-        ::testing::Values(
-                gemm_test_params{{1, 1, 1, 1, 1, 1, 1, 1}, 7, 4, 3, 2, 3, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 1, 3, 1, 1, 1, 3}, 7, 4, 3, 2, 3, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 1, 1, 1, 1, 1, 3}, 7, 4, 3, 2, 3, false, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 1, 1, 1, 1, 1, 1, 1}, 7, 4, 3, 2, 3, true, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 1, 3, 1, 1, 1, 3}, 7, 4, 3, 2, 3, true, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 1, 1, 1, 1, 1, 3}, 7, 4, 3, 2, 3, true, false, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 1, 1, 1, 1, 1, 1, 1}, 7, 4, 3, 2, 3, false, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 1, 3, 1, 1, 1, 3}, 7, 4, 3, 2, 3, false, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 1, 1, 1, 1, 1, 3}, 7, 4, 3, 2, 3, false, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 1, 1, 1, 1, 1, 1, 1}, 7, 4, 3, 2, 3, true, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 1, 3, 1, 1, 1, 3}, 7, 4, 3, 2, 3, true, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any},
-                gemm_test_params{{1, 3, 1, 1, 1, 1, 1, 3}, 7, 4, 3, 2, 3, true, true, 1, MKLDNNPlugin::impl_desc_type::gemm_any}
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp
deleted file mode 100644
index 14df8044d0f..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp
+++ /dev/null
@@ -1,471 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct input_test_params {
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNGraphInputTests: public TestsCommon,
-                                     public WithParamInterface<input_test_params> {
-    std::string model_t = R"V0G0N(
-<net name="InputsOnly" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in3" type="Input" precision="FP32" id="3">
-            <output>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="power1" id="4" type="Power" precision="FP32">
-            <power_data power="1" scale="1" shift="1"/>
-            <input>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                </port>
-            </input>
-            <output>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="power2" id="5" type="Power" precision="FP32">
-            <power_data power="1" scale="1" shift="1"/>
-            <input>
-                <port id="6">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                </port>
-            </input>
-            <output>
-                <port id="7">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="power3" id="6" type="Power" precision="FP32">
-            <power_data power="1" scale="1" shift="1"/>
-            <input>
-                <port id="8">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                </port>
-            </input>
-            <output>
-                <port id="9">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="4" to-port="4"/>
-        <edge from-layer="2" from-port="2" to-layer="5" to-port="6"/>
-        <edge from-layer="3" from-port="3" to-layer="6" to-port="8"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(input_test_params p) {
-        return model_t;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            input_test_params p = ::testing::WithParamInterface<input_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Input || nodes[i]->getType() == MKLDNNPlugin::Output) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    size_t count = (nodes[i]->getType() == MKLDNNPlugin::Input) ? 0 : 2;
-                    if (nodes[i]->getName() == "in3") {
-                        count = 1;
-                    }
-                    if (nodes[i]->getName() == "out_power3") {
-                        count = 3;
-                    }
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(count)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
-                }
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphInputTests, TestsInput) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        TestsInput, MKLDNNGraphInputTests,
-        ::testing::Values(
-                input_test_params{1, MKLDNNPlugin::impl_desc_type::unknown, {
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                            ASSERT_EQ(0, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                        },
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                            ASSERT_EQ(0, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NC, impl.getConfig().outConfs.at(0).desc.getLayout());
-                        },
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(0, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                        },
-                        [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                            ASSERT_EQ(0, impl.getConfig().outConfs.size());
-                            ASSERT_EQ(InferenceEngine::Layout::NC, impl.getConfig().inConfs.at(0).desc.getLayout());
-                        }
-                } }
-        ));
-
-class MKLDNNGraphConstInputTests: public TestsCommon {
-    std::string model_t = R"V0G0N(
-<net name="ConcatOnly" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-            <blobs>
-                <custom offset="0" size="48"/>
-            </blobs>
-        </layer>
-        <layer name="in2" type="Const" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>1</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-            <blobs>
-                <custom offset="48" size="24"/>
-            </blobs>
-        </layer>
-        <layer name="con" id="3" type="Concat" precision="FP32">
-            <concat_data axis="2"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>1</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
-    </edges>
-</net>
-)V0G0N";
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            std::string model = model_t;
-
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {72}, InferenceEngine::C });
-            weights->allocate();
-            float * data = weights->buffer();
-
-            std::cout << weights->size() << std::endl;
-
-            InferenceEngine::SizeVector dims_src1 = {1, 3, 2, 2};
-            InferenceEngine::SizeVector dims_src2 = {1, 3, 1, 2};
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::NCHW});
-            src1->allocate();
-            float *srcData = src1->buffer();
-            for (size_t i = 0; i < 12; i++, data++, srcData++) {
-                *data = 1;
-                *srcData = 1;
-            }
-
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::NCHW});
-            src2->allocate();
-            srcData = src2->buffer();
-            for (size_t i = 0; i < 6; i++, data++, srcData++) {
-                *data = 2;
-                *srcData = 2;
-            }
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            ASSERT_LE(3, nodes.size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs["in1"] = src1;
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            // Compare
-            float *src1_ptr = src1->buffer();
-            size_t src1_size = src1->size();
-            float *src2_ptr = src2->buffer();
-            size_t src2_size = src2->size();
-            float *dst_ptr = output->buffer();
-            size_t dst_size = output->size();
-
-            int len1 = 1, len2 = 1, cycles;
-            for (int dim = 2; dim < output->getTensorDesc().getDims().size(); dim++) {
-                len1 *= src1->getTensorDesc().getDims()[dim];
-                len2 *= src2->getTensorDesc().getDims()[dim];
-            }
-            cycles = 2;
-
-            int index1 = 0, index2 = 0, index = 0;
-            for (int cycle = 0; cycle < cycles; cycle ++) {
-                for (int i1 = 0; i1 < len1; i1++) {
-                    if (src1_ptr[index1] != dst_ptr[index])
-                    {
-                        FAIL() << "index: " << index << " src: " << src1_ptr[index1] << ", dst: " << dst_ptr[index];
-                    }
-                    index1++; index++;
-                }
-                for (int i2 = 0; i2 < len2; i2++) {
-                    if (src2_ptr[index2] != dst_ptr[index])
-                    {
-                        FAIL() << "index: " << index << " src: " << src2_ptr[index2] << ", dst: " << dst_ptr[index];
-                    }
-                    index2++; index++;
-                }
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_F(MKLDNNGraphConstInputTests, TestsConstInput) {}
-
-
-struct input_layout_test_params {
-    InferenceEngine::Layout layout;
-    std::vector<float> reference;
-    MKLDNNPlugin::impl_desc_type selectedType;
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-class MKLDNNGraphInputLayoutTest : public TestsCommon, public WithParamInterface<input_layout_test_params> {
-    std::string model_t = R"V0G0N(
-<net name="InputLayers" version="2" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="power1" id="1" type="Power" precision="FP32">
-            <power_data power="1" scale="1" shift="1"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-    <pre-process reference-layer-name="input" mean-precision="FP32">
-        <channel id="0">
-            <mean value="1.0"/>
-        </channel>
-        <channel id="1">
-            <mean value="2.0"/>
-        </channel>
-        <channel id="2">
-            <mean value="3.0"/>
-        </channel>
-    </pre-process>
-</net>
-)V0G0N";
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            input_layout_test_params p = ::testing::WithParamInterface<input_layout_test_params>::GetParam();
-            std::string model = model_t;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-            network.getInputsInfo().begin()->second->setLayout(p.layout);
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, { 1, 3, 2, 2 }, p.layout);
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
-            src->allocate();
-            fill_data_dbgval(src->buffer(), src->size());
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-
-            InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            InferenceEngine::BlobMap outputBlobs;
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-            //  Check results
-            if (memcmp((*output).data(), &p.reference[0], output->byteSize()) != 0)
-                FAIL() << "Wrong result with compare reference!";
-        }
-        catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphInputLayoutTest, TestsLayoutInput) {}
-
-INSTANTIATE_TEST_CASE_P(
-    TestsLayoutInput, MKLDNNGraphInputLayoutTest,
-    ::testing::Values(
-        input_layout_test_params{ InferenceEngine::NCHW, { 0,1,2,3,3,4,5,6,6,7,8,9 }, MKLDNNPlugin::impl_desc_type::unknown }
-//        input_layout_test_params{ InferenceEngine::NHWC, { 0,0,0,3,3,3,6,6,6,9,9,9 }, MKLDNNPlugin::impl_desc_type::unknown }
-));
-
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
deleted file mode 100644
index 9c57834cbd1..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
+++ /dev/null
@@ -1,271 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include <mkldnn_plugin.h>
-#include "mkldnn_exec_network.h"
-#include <ie_core.hpp>
-#include <mkldnn_extension_utils.h>
-#include <config.h>
-
-using namespace std;
-using namespace mkldnn;
-
-class MKLDNNTestExecNetwork: public MKLDNNPlugin::MKLDNNExecNetwork {
-public:
-    MKLDNNPlugin::MKLDNNGraph& getGraph() {
-        return _graphs.front();
-    }
-};
-
-static MKLDNNPlugin::MKLDNNGraph& getGraph(InferenceEngine::IExecutableNetworkInternal::Ptr execNetwork) {
-    return static_cast<MKLDNNTestExecNetwork*>(execNetwork.get())->getGraph();
-}
-
-class MKLDNNGraphLeaksTests: public ::testing::Test {
-protected:
-    void addOutputToEachNode(InferenceEngine::CNNNetwork& network, std::vector<std::string>& new_outputs,
-                             InferenceEngine::CNNLayerPtr cnnLayer) {
-        auto outputs = network.getOutputsInfo();
-        if (outputs.find(cnnLayer->name) != outputs.end())
-            return;
-
-        network.addOutput(cnnLayer->name);
-        new_outputs.push_back(cnnLayer->name);
-
-        for (const auto &layer : cnnLayer->outData) {
-            for (const auto &data : getInputTo(layer)) {
-                addOutputToEachNode(network, new_outputs, data.second);
-            }
-        }
-    }
-
-    void fill_data(float *data, size_t size, size_t duty_ratio = 10) {
-        for (size_t i = 0; i < size; i++) {
-            if ( ( i / duty_ratio)%2 == 1) {
-                data[i] = 0.0;
-            } else {
-                data[i] = (float) sin((float)i);
-            }
-        }
-    }
-};
-
-TEST_F(MKLDNNGraphLeaksTests, MKLDNN_not_release_outputs_fp32) {
-    try {
-        std::string model = "<net name=\"LeNet\" version=\"2\" batch=\"1\">\n"
-                "    <layers>\n"
-                "        <layer name=\"data\" type=\"Input\" precision=\"FP32\" id=\"0\">\n"
-                "            <output>\n"
-                "                <port id=\"0\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>28</dim>\n"
-                "                    <dim>28</dim>\n"
-                "                </port>\n"
-                "            </output>\n"
-                "        </layer>\n"
-                "        <layer name=\"conv1\" type=\"Convolution\" precision=\"FP32\" id=\"1\">\n"
-                "            <convolution_data stride-x=\"1\" stride-y=\"1\" pad-x=\"0\" pad-y=\"0\" kernel-x=\"5\" kernel-y=\"5\" output=\"20\" group=\"1\"/>\n"
-                "            <input>\n"
-                "                <port id=\"1\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>28</dim>\n"
-                "                    <dim>28</dim>\n"
-                "                </port>\n"
-                "            </input>\n"
-                "            <output>\n"
-                "                <port id=\"2\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>20</dim>\n"
-                "                    <dim>24</dim>\n"
-                "                    <dim>24</dim>\n"
-                "                </port>\n"
-                "            </output>\n"
-                "            <weights offset=\"0\" size=\"2000\"/>\n"
-                "            <biases offset=\"2000\" size=\"80\"/>\n"
-                "        </layer>\n"
-                "        <layer name=\"pool1\" type=\"Pooling\" precision=\"FP32\" id=\"2\">\n"
-                "            <pooling_data kernel-x=\"2\" kernel-y=\"2\" pad-x=\"0\" pad-y=\"0\" stride-x=\"2\" stride-y=\"2\" rounding-type=\"ceil\" pool-method=\"max\"/>\n"
-                "            <input>\n"
-                "                <port id=\"3\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>20</dim>\n"
-                "                    <dim>24</dim>\n"
-                "                    <dim>24</dim>\n"
-                "                </port>\n"
-                "            </input>\n"
-                "            <output>\n"
-                "                <port id=\"4\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>20</dim>\n"
-                "                    <dim>12</dim>\n"
-                "                    <dim>12</dim>\n"
-                "                </port>\n"
-                "            </output>\n"
-                "        </layer>\n"
-                "        <layer name=\"conv2\" type=\"Convolution\" precision=\"FP32\" id=\"3\">\n"
-                "            <convolution_data stride-x=\"1\" stride-y=\"1\" pad-x=\"0\" pad-y=\"0\" kernel-x=\"5\" kernel-y=\"5\" output=\"50\" group=\"1\"/>\n"
-                "            <input>\n"
-                "                <port id=\"5\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>20</dim>\n"
-                "                    <dim>12</dim>\n"
-                "                    <dim>12</dim>\n"
-                "                </port>\n"
-                "            </input>\n"
-                "            <output>\n"
-                "                <port id=\"6\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>50</dim>\n"
-                "                    <dim>8</dim>\n"
-                "                    <dim>8</dim>\n"
-                "                </port>\n"
-                "            </output>\n"
-                "            <weights offset=\"2080\" size=\"100000\"/>\n"
-                "            <biases offset=\"102080\" size=\"200\"/>\n"
-                "        </layer>\n"
-                "        <layer name=\"pool2\" type=\"Pooling\" precision=\"FP32\" id=\"4\">\n"
-                "            <pooling_data kernel-x=\"2\" kernel-y=\"2\" pad-x=\"0\" pad-y=\"0\" stride-x=\"2\" stride-y=\"2\" rounding-type=\"ceil\" pool-method=\"max\"/>\n"
-                "            <input>\n"
-                "                <port id=\"7\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>50</dim>\n"
-                "                    <dim>8</dim>\n"
-                "                    <dim>8</dim>\n"
-                "                </port>\n"
-                "            </input>\n"
-                "            <output>\n"
-                "                <port id=\"8\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>50</dim>\n"
-                "                    <dim>4</dim>\n"
-                "                    <dim>4</dim>\n"
-                "                </port>\n"
-                "            </output>\n"
-                "        </layer>\n"
-                "        <layer name=\"ip1\" type=\"FullyConnected\" precision=\"FP32\" id=\"5\">\n"
-                "            <fc_data out-size=\"500\"/>\n"
-                "            <input>\n"
-                "                <port id=\"9\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>50</dim>\n"
-                "                    <dim>4</dim>\n"
-                "                    <dim>4</dim>\n"
-                "                </port>\n"
-                "            </input>\n"
-                "            <output>\n"
-                "                <port id=\"10\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>500</dim>\n"
-                "                </port>\n"
-                "            </output>\n"
-                "            <weights offset=\"102280\" size=\"1600000\"/>\n"
-                "            <biases offset=\"1702280\" size=\"2000\"/>\n"
-                "        </layer>\n"
-                "        <layer name=\"relu1\" type=\"ReLU\" precision=\"FP32\" id=\"6\">\n"
-                "            <input>\n"
-                "                <port id=\"11\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>500</dim>\n"
-                "                </port>\n"
-                "            </input>\n"
-                "            <output>\n"
-                "                <port id=\"12\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>500</dim>\n"
-                "                </port>\n"
-                "            </output>\n"
-                "        </layer>\n"
-                "        <layer name=\"ip2\" type=\"FullyConnected\" precision=\"FP32\" id=\"7\">\n"
-                "            <fc_data out-size=\"10\"/>\n"
-                "            <input>\n"
-                "                <port id=\"13\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>500</dim>\n"
-                "                </port>\n"
-                "            </input>\n"
-                "            <output>\n"
-                "                <port id=\"14\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>10</dim>\n"
-                "                </port>\n"
-                "            </output>\n"
-                "            <weights offset=\"1704280\" size=\"20000\"/>\n"
-                "            <biases offset=\"1724280\" size=\"40\"/>\n"
-                "        </layer>\n"
-                "        <layer name=\"prob\" type=\"SoftMax\" precision=\"FP32\" id=\"8\">\n"
-                "            <input>\n"
-                "                <port id=\"15\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>10</dim>\n"
-                "                </port>\n"
-                "            </input>\n"
-                "            <output>\n"
-                "                <port id=\"16\">\n"
-                "                    <dim>1</dim>\n"
-                "                    <dim>10</dim>\n"
-                "                </port>\n"
-                "            </output>\n"
-                "        </layer>\n"
-                "    </layers>\n"
-                "    <edges>\n"
-                "        <edge from-layer=\"0\" from-port=\"0\" to-layer=\"1\" to-port=\"1\"/>\n"
-                "        <edge from-layer=\"1\" from-port=\"2\" to-layer=\"2\" to-port=\"3\"/>\n"
-                "        <edge from-layer=\"2\" from-port=\"4\" to-layer=\"3\" to-port=\"5\"/>\n"
-                "        <edge from-layer=\"3\" from-port=\"6\" to-layer=\"4\" to-port=\"7\"/>\n"
-                "        <edge from-layer=\"4\" from-port=\"8\" to-layer=\"5\" to-port=\"9\"/>\n"
-                "        <edge from-layer=\"5\" from-port=\"10\" to-layer=\"6\" to-port=\"11\"/>\n"
-                "        <edge from-layer=\"6\" from-port=\"12\" to-layer=\"7\" to-port=\"13\"/>\n"
-                "        <edge from-layer=\"7\" from-port=\"14\" to-layer=\"8\" to-port=\"15\"/>\n"
-                "    </edges>\n"
-                "</net>";
-
-        size_t weights_size = 1724320;
-
-        InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {weights_size}, InferenceEngine::C });
-        weights->allocate();
-        fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-        InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-        InferenceEngine::Core core;
-        InferenceEngine::CNNNetwork network;
-        ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-        auto outputs = network.getOutputsInfo();
-        std::vector<std::string> new_outputs;
-
-        for (auto input : network.getInputsInfo()) {
-            for (const auto &layer : getInputTo(input.second->getInputData())) {
-                addOutputToEachNode(network, new_outputs, layer.second);
-            }
-        }
-
-        ASSERT_NE(1, network.getOutputsInfo().size());
-
-        std::shared_ptr<MKLDNNPlugin::Engine> score_engine(new MKLDNNPlugin::Engine());
-        InferenceEngine::IExecutableNetworkInternal::Ptr exeNetwork1;
-        ASSERT_NO_THROW(exeNetwork1 = score_engine->LoadNetwork(network, {}));
-
-        size_t modified_outputs_size = getGraph(exeNetwork1).GetOutputNodes().size();
-
-        InferenceEngine::CNNNetwork network2;
-        ASSERT_NO_THROW(network2 = core.ReadNetwork(model, weights_ptr));
-        ASSERT_EQ(1, network2.getOutputsInfo().size());
-
-        InferenceEngine::IExecutableNetworkInternal::Ptr exeNetwork2;
-        ASSERT_NO_THROW(exeNetwork2 = score_engine->LoadNetwork(network2, {}));
-
-        size_t original_outputs_size = getGraph(exeNetwork2).GetOutputNodes().size();
-
-        ASSERT_NE(modified_outputs_size, original_outputs_size);
-        ASSERT_EQ(1, original_outputs_size);
-    } catch (std::exception& e) {
-        FAIL() << e.what();
-    } catch (...) {
-        FAIL();
-    }
-}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp
deleted file mode 100644
index 6612f887585..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp
+++ /dev/null
@@ -1,301 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct lrn_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in;
-
-    size_t local_size;
-    float alpha;
-    float beta;
-    size_t k;
-
-    size_t num_prim_desc;
-
-    int selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_lrn(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst, lrn_test_params prm)
-{
-    size_t IW = prm.in.w;
-    size_t IH = prm.in.h;
-    size_t IC = prm.in.c;
-
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    for (uint32_t c = 0; c < IC; c++) {
-        for (uint32_t h = 0; h < IH; h++) {
-            for (uint32_t w = 0; w < IW; w++) {
-                uint32_t oidx = c * IH * IW
-                                + h * IW + w;
-
-                uint32_t sz = prm.local_size;
-                int32_t c_start = c - sz / 2;
-                int32_t c_end = c_start + sz;
-                if (c_start < 0) c_start = 0;
-                if (c_end > (int32_t)IC) c_end = IC;
-                data_t sum = 0.0;
-                for (int32_t c1 = c_start; c1 < c_end; c1++) {
-                    uint32_t idx = c1 * IH * IW + h * IW + w;
-                    data_t s = src_data[idx];
-
-                    sum += s * s;
-                }
-
-                data_t norm_coef = powf(1. + prm.alpha * sum / sz, -prm.beta);
-                dst_data[oidx] = norm_coef * src_data[oidx];
-            }
-        }
-    }
-}
-
-class MKLDNNGraphLrnTests: public TestsCommon,
-                                     public WithParamInterface<lrn_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Lrn_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="norm" id="1" type="LRN" precision="FP32">
-            <lrn local_size="_LS_" alpha="_A_" beta="_B_" k="_K_" region="ACROSS" />
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    std::string getModel(lrn_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-
-        REPLACE_WITH_NUM(model, "_LS_", p.local_size);
-        REPLACE_WITH_NUM(model, "_A_", p.alpha);
-        REPLACE_WITH_NUM(model, "_B_", p.beta);
-        REPLACE_WITH_NUM(model, "_K_", p.k);
-
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            lrn_test_params p = ::testing::WithParamInterface<lrn_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Lrn) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType,
-                              nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-            if (nodes.size() != 3 && nodes.size() != 5)
-                FAIL() << "Nodes amount should be 3 or 5 (in reorder case)";
-
-            InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_lrn(*srcPtr, dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphLrnTests, TestsLrn) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsLrn, MKLDNNGraphLrnTests,
-        ::testing::Values(
-                lrn_test_params{
-                        {1, 3, 228, 228},
-                        5, 0.0001f, 0.75f, 1, 3, MKLDNNPlugin::impl_desc_type::ref_any, {
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref_any, impl.getImplementationType());
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                },
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref_any, impl.getImplementationType());
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                    ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                    ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                },
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref_any, impl.getImplementationType());
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                    ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                    ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                }
-                        }},
-                lrn_test_params{{1, 16, 228, 228}, 5, 0.0001f, 0.75f, 1, 3, MKLDNNPlugin::impl_desc_type::jit}));
-
-class MKLDNNGraphDynBatchLrnTests: public MKLDNNGraphLrnTests {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            lrn_test_params p = ::testing::WithParamInterface<lrn_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.in.n;
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = {MB, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkLRN = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Lrn;
-            };
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkLRN);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkLRN);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchLrnTests, TestsDynBatchLrn) {}
-
-// TODO: rewrite to ngraph to have reshape functionality
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_TestsDynBatchLrn, MKLDNNGraphDynBatchLrnTests,
-        ::testing::Values(
-                lrn_test_params{{1, 3, 228, 228}, 5, 0.0001f, 0.75f, 1, 3, MKLDNNPlugin::impl_desc_type::ref_any},
-                lrn_test_params{{1, 16, 228, 228}, 5, 0.0001f, 0.75f, 1, 3, MKLDNNPlugin::impl_desc_type::jit}));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
deleted file mode 100644
index f6ea2b49901..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
+++ /dev/null
@@ -1,635 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <nodes/base.hpp>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-using namespace InferenceEngine;
-using namespace Extensions;
-using namespace ::Cpu;
-
-namespace {
-
-OV_CC_DOMAINS(GraphPermuteTests);
-
-}   // namespace
-
-struct permute_test_params {
-    Layout layout_in, layout_out;
-    Precision precision;
-    size_t num_prim_desc;
-
-    SizeVector dims;
-    SizeVector permute_order;
-    SizeVector block_dims_in;
-    SizeVector block_order_in;
-    SizeVector block_dims_out;
-    SizeVector block_order_out;
-};
-
-class FakeLayerImpl_permute: public Cpu::ExtLayerBase,
-                     public WithParamInterface<permute_test_params> {
-public:
-    explicit FakeLayerImpl_permute(const CNNLayer* layer) {
-        try {
-            layout = static_cast<Layout>(layer->GetParamAsUInt("layout"));
-            block_dims = layer->GetParamAsInts("block_dims");
-            order = layer->GetParamAsInts("order");
-            addConfig(layer);
-        } catch (InferenceEngine::Exception &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    Layout layout;
-    std::vector<int> block_dims;
-    std::vector<int> order;
-
-    void addConfig(const CNNLayer* layer) {
-        LayerConfig config;
-
-        // Fill tensor parameters into config
-        auto fill_port = [&] (std::vector<DataConfig>& port, const DataPtr& data) {
-            if (!data) IE_THROW() << "Cannot get input data!";
-
-            DataConfig dataConfig;
-            dataConfig.inPlace = 0;
-            dataConfig.constant = false;
-
-            const TensorDesc& data_desc = data->getTensorDesc();
-            const SizeVector& data_dims = data_desc.getDims();
-
-            InferenceEngine::Precision precision = data_desc.getPrecision();
-            if (block_dims.empty()) {
-                dataConfig.desc = TensorDesc(precision, data_dims, layout);
-            } else {
-                SizeVector tmp_block_dims(block_dims.size());
-                SizeVector tmp_order(order.size());
-                for (size_t i = 0; i < order.size(); i++) {
-                    tmp_block_dims[i] = block_dims[i];
-                    tmp_order[i] = order[i];
-                }
-                dataConfig.desc = TensorDesc(precision, data_dims, {tmp_block_dims, tmp_order});
-            }
-
-            port.push_back(dataConfig);
-        };
-
-        fill_port(config.inConfs, layer->insData[0].lock());
-        fill_port(config.outConfs, layer->outData[0]);
-        config.outConfs[0].desc.setPrecision(config.inConfs[0].desc.getPrecision());
-        confs.push_back(config);
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        return OK;
-    }
-};
-
-static std::string precToStr (Precision prec) {
-    return prec == Precision::I8 ? "I8" : "FP32";
-}
-
-template <typename data_t>
-static void fill_int_data(data_t *data, size_t size) {
-    for (size_t i = 0 ; i < size; i++) {
-        data[i] = i * 13 % 21 - 10;
-    }
-}
-
-template <typename data_t>
-static void ref_permute(const TBlob<data_t> &src, TBlob<float> &dst, permute_test_params prm) {
-    const data_t *src_data = src.readOnly();
-    float *dst_data = dst.data();
-
-    SizeVector orderedDims;
-    for (auto ord : prm.permute_order) {
-        orderedDims.push_back(src.getTensorDesc().getDims()[ord]);
-    }
-    TensorDesc desc(Precision::FP32, src.getTensorDesc().getDims(), {orderedDims, prm.permute_order});
-
-    for (int i=0; i < src.size(); i++) {
-        dst_data[desc.offset(i)] = src_data[src.getTensorDesc().offset(i)];
-    }
-}
-
-typedef std::tuple<Layout, Layout, Precision, size_t, SizeVector, SizeVector, SizeVector,
-        SizeVector, SizeVector, SizeVector> test_params_t;
-
-template <typename src_data_t>
-class MKLDNNGraphPermuteTests: public TestsCommon,
-public WithParamInterface<test_params_t> {
-    std::string model_t = (std::string) R"V0G0N(
-<Net Name="Permute_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="_PREC_" id="0">
-            <output>
-                <port id="0">
-                    __DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer name="fake1" type="FakeLayer_permute" precision="_PREC_" id="1">
-            <data layout="_LAYOUT_IN_"
-                  block_dims="_BLOCK_DIMS_IN_"
-                  order="_BLOCK_ORDER_IN_"/>
-            <input>
-                <port id="0">
-                    __DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    __DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer name="permute" type="Permute" precision="_PREC_" id="2">
-            <data order="_PERMUTE_ORDER_"/>
-            <input>
-                <port id="0">
-                    __DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    __DST_DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer name="fake2" type="FakeLayer_permute" precision="_PREC_" id="3">
-            <data layout="_LAYOUT_OUT_"
-                  block_dims="_BLOCK_DIMS_OUT_"
-                  order="_BLOCK_ORDER_OUT_"/>
-            <input>
-                <port id="0">
-                    __DST_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    __DST_DIMS__
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
-        <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    std::string getModel(permute_test_params p) {
-        std::string model = model_t;
-        std::string dims;
-        std::string dst_dims;
-        for (auto& dim : p.dims) {
-            dims += "<dim>";
-            dims += std::to_string(dim) + "</dim>\n";
-        }
-
-        std::string order;
-        for (auto& ord : p.permute_order) {
-            if (!order.empty())
-                order += ",";
-            order += std::to_string(ord);
-            dst_dims += "<dim>";
-            dst_dims += std::to_string(p.dims[ord]) + "</dim>\n";
-        }
-
-        REPLACE_WITH_STR(model, "__DIMS__", dims);
-        REPLACE_WITH_STR(model, "__DST_DIMS__", dst_dims);
-        REPLACE_WITH_STR(model, "_PERMUTE_ORDER_", order);
-        REPLACE_WITH_STR(model, "_PREC_", precToStr(p.precision));
-        REPLACE_WITH_NUM(model, "_LAYOUT_IN_", (unsigned int)p.layout_in);
-        REPLACE_WITH_NUM(model, "_LAYOUT_OUT_", (unsigned int)p.layout_out);
-
-        REPLACE_WITH_NUM_VECTOR(model, "_BLOCK_DIMS_IN_", p.block_dims_in);
-        REPLACE_WITH_NUM_VECTOR(model, "_BLOCK_ORDER_IN_", p.block_order_in);
-        REPLACE_WITH_NUM_VECTOR(model, "_BLOCK_DIMS_OUT_", p.block_dims_out);
-        REPLACE_WITH_NUM_VECTOR(model, "_BLOCK_ORDER_OUT_", p.block_order_out);
-
-        return model;
-    }
-
-    virtual permute_test_params initialize_permute_test_params() {
-        auto test_params = GetParam();
-        permute_test_params p;
-
-        p.layout_in = std::get<0>(test_params);
-        p.layout_out = std::get<1>(test_params);
-        p.precision = std::get<2>(test_params);
-        p.num_prim_desc = std::get<3>(test_params);
-        p.dims = std::get<4>(test_params);
-        p.permute_order = std::get<5>(test_params);
-        p.block_dims_in = std::get<6>(test_params);
-        p.block_order_in = std::get<7>(test_params);
-        p.block_dims_out = std::get<8>(test_params);
-        p.block_order_out = std::get<9>(test_params);
-
-        return p;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            permute_test_params p = initialize_permute_test_params();
-            std::string model = getModel(p);
-
-            Core core;
-            CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
-            {
-                auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
-                defaultExt->layersFactory.registerNodeIfRequired(GraphPermuteTests, FakeLayer_permute, "FakeLayer_permute", Cpu::ImplFactory<FakeLayerImpl_permute>);
-                manager->AddExtension(defaultExt);
-            }
-            graph.CreateGraph(network, manager);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Permute) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                }
-            }
-
-            Blob::Ptr src = make_shared_blob<src_data_t>({p.precision, p.dims, TensorDesc::getLayoutByDims(p.dims)});
-            src->allocate();
-            if (typeid(src_data_t) == typeid(int8_t)) {
-                fill_int_data(src->buffer().as<src_data_t *>(), src->size());
-            } else {
-                fill_data(src->buffer(), src->size());
-            }
-
-            auto* srcPtr = dynamic_cast<TBlob<src_data_t>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            OutputsDataMap out;
-            out = network.getOutputsInfo();
-            BlobMap outputBlobs;
-
-            auto item = *out.begin();
-
-            TBlob<float>::Ptr output;
-            output = make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            TensorDesc td(Precision::FP32, p.dims, TensorDesc::getLayoutByDims(p.dims));
-            TBlob<float> dst_ref(td);
-            dst_ref.allocate();
-
-            ref_permute(*srcPtr, dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-using permute_f32 = MKLDNNGraphPermuteTests<float>;
-using permute_s8 = MKLDNNGraphPermuteTests<int8_t>;
-
-TEST_P(permute_f32, TestsPermute) {}
-TEST_P(permute_s8, TestsPermute) {}
-
-#define test_cases_planar_4d(prec) ::testing::Combine( \
-        ::testing::Values(Layout::NCHW, Layout::NHWC), \
-        ::testing::Values(Layout::NCHW, Layout::NHWC), \
-        ::testing::Values(prec), \
-        ::testing::Values(2), \
-        ::testing::Values(SizeVector({2, 3, 4, 5})), \
-        ::testing::Values(SizeVector({0, 1, 2, 3}), SizeVector({0, 2, 3, 1}), \
-                          SizeVector({0, 2, 1, 3}), SizeVector({0, 1, 3, 2}), \
-                          SizeVector({1, 0, 2, 3})), \
-        ::testing::Values(SizeVector({})), \
-        ::testing::Values(SizeVector({})), \
-        ::testing::Values(SizeVector({})), \
-        ::testing::Values(SizeVector({})) \
-)
-
-#define test_cases_planar_5d(prec) ::testing::Combine( \
-        ::testing::Values(Layout::NCDHW, Layout::NDHWC), \
-        ::testing::Values(Layout::NCDHW, Layout::NDHWC), \
-        ::testing::Values(prec), \
-        ::testing::Values(2), \
-        ::testing::Values(SizeVector({2, 3, 4, 5, 6})), \
-        ::testing::Values(SizeVector({0, 1, 2, 3, 4}), SizeVector({0, 4, 2, 1, 3}), \
-                          SizeVector({0, 2, 4, 3, 1}), SizeVector({0, 3, 2, 4, 1}), \
-                          SizeVector({0, 3, 1, 4, 2}), SizeVector({1, 0, 2, 3, 4})), \
-        ::testing::Values(SizeVector({})), \
-        ::testing::Values(SizeVector({})), \
-        ::testing::Values(SizeVector({})), \
-        ::testing::Values(SizeVector({})) \
-)
-
-#define case_planar_0(prec) test_params_t(Layout::NC, Layout::NC, prec, 1, {20, 3}, {0, 1}, {}, {}, {}, {})
-#define case_planar_1(prec) test_params_t(Layout::CHW, Layout::CHW, prec, 1, {20, 30, 4}, {0, 1, 2}, {}, {}, {}, {})
-#define case_planar_2(prec) test_params_t(Layout::CHW, Layout::CHW, prec, 1, {20, 30, 4}, {0, 2, 1}, {}, {}, {}, {})
-#define case_planar_3(prec) test_params_t(Layout::CHW, Layout::CHW, prec, 1, {2, 12, 9}, {0, 2, 1}, {}, {}, {}, {})
-#define case_planar_4(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 1, {2, 80, 2, 2, 4, 5}, {0, 1, 4, 2, 5, 3}, {}, {}, {}, {})
-#define case_planar_5(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 1, {2, 8, 30, 3, 4, 5}, {0, 1, 4, 2, 5, 3}, {}, {}, {}, {})
-#define case_planar_6(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 1, {2, 8, 3, 30, 4, 5}, {0, 3, 4, 1, 5, 2}, {}, {}, {}, {})
-
-#define case_blocked_0(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 1, 2, 3}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {2, 4, 10, 20, 8}, {0, 1, 2, 3, 1})
-#define case_blocked_1(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 2, 3, 1}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {2, 2, 20, 32, 8}, {0, 1, 2, 3, 1})
-#define case_blocked_2(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 2, 1, 3}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {2, 2, 32, 20, 8}, {0, 1, 2, 3, 1})
-#define case_blocked_3(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 1, 3, 2}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {2, 4, 20, 10, 8}, {0, 1, 2, 3, 1})
-#define case_blocked_4(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 3, {10, 24, 4, 5}, {1, 0, 2, 3}, \
-{10, 3, 4, 5, 8}, {0, 1, 2, 3, 1}, {24, 2, 4, 5, 8}, {0, 1, 2, 3, 1})
-#define case_blocked_5(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 4, {2, 32, 5, 10, 20}, {0, 1, 2, 3, 4}, \
-{2, 4, 5, 10, 20, 8}, {0, 1, 2, 3, 4, 1}, {2, 4, 5, 10, 20, 8}, {0, 1, 2, 3, 4, 1})
-#define case_blocked_6(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 4, {2, 32, 5, 10, 20}, {0, 4, 2, 1, 3}, \
-{2, 4, 5, 10, 20, 8}, {0, 1, 2, 3, 4, 1}, {2, 3, 5, 32, 10, 8}, {0, 1, 2, 3, 4, 1})
-#define case_blocked_7(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 4, {2, 32, 5, 10, 20}, {0, 2, 4, 3, 1}, \
-{2, 4, 5, 10, 20, 8}, {0, 1, 2, 3, 4, 1}, {2, 1, 20, 10, 32, 8}, {0, 1, 2, 3, 4, 1})
-#define case_blocked_8(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 4, {2, 32, 5, 10, 20}, {0, 3, 2, 4, 1}, \
-{2, 4, 5, 10, 20, 8}, {0, 1, 2, 3, 4, 1}, {2, 2, 5, 20, 32, 8}, {0, 1, 2, 3, 4, 1})
-#define case_blocked_9(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 4, {2, 32, 5, 10, 20}, {0, 3, 1, 4, 2}, \
-{2, 4, 5, 10, 20, 8}, {0, 1, 2, 3, 4, 1}, {2, 2, 32, 20, 5, 8}, {0, 1, 2, 3, 4, 1})
-#define case_blocked_10(prec) test_params_t(Layout::BLOCKED, Layout::BLOCKED, prec, 3, {10, 24, 4, 5, 6}, {1, 0, 2, 3, 4}, \
-{10, 3, 4, 5, 6, 8}, {0, 1, 2, 3, 4, 1}, {24, 2, 4, 5, 6, 8}, {0, 1, 2, 3, 4, 1})
-
-#define case_planar_to_blocked_0(prec) test_params_t(Layout::NCHW, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 1, 2, 3}, \
-{}, {}, {2, 4, 10, 20, 8}, {0, 1, 2, 3, 1})
-#define case_planar_to_blocked_1(prec) test_params_t(Layout::NCHW, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 2, 3, 1}, \
-{}, {}, {2, 2, 20, 32, 8}, {0, 1, 2, 3, 1})
-#define case_planar_to_blocked_2(prec) test_params_t(Layout::NCHW, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 2, 1, 3}, \
-{}, {}, {2, 2, 32, 20, 8}, {0, 1, 2, 3, 1})
-#define case_planar_to_blocked_3(prec) test_params_t(Layout::NCHW, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 1, 3, 2}, \
-{}, {}, {2, 4, 20, 10, 8}, {0, 1, 2, 3, 1})
-#define case_planar_to_blocked_4(prec) test_params_t(Layout::NCHW, Layout::BLOCKED, prec, 3, {10, 24, 4, 5}, {1, 0, 2, 3}, \
-{}, {}, {24, 2, 4, 5, 8}, {0, 1, 2, 3, 1})
-#define case_planar_to_blocked_5(prec) test_params_t(Layout::NHWC, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 1, 2, 3}, \
-{}, {}, {2, 4, 10, 20, 8}, {0, 1, 2, 3, 1})
-#define case_planar_to_blocked_6(prec) test_params_t(Layout::NHWC, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 2, 3, 1}, \
-{}, {}, {2, 2, 20, 32, 8}, {0, 1, 2, 3, 1})
-#define case_planar_to_blocked_7(prec) test_params_t(Layout::NHWC, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 2, 1, 3}, \
-{}, {}, {2, 2, 32, 20, 8}, {0, 1, 2, 3, 1})
-#define case_planar_to_blocked_8(prec) test_params_t(Layout::NHWC, Layout::BLOCKED, prec, 4, {2, 32, 10, 20}, {0, 1, 3, 2}, \
-{}, {}, {2, 4, 20, 10, 8}, {0, 1, 2, 3, 1})
-#define case_planar_to_blocked_9(prec) test_params_t(Layout::NHWC, Layout::BLOCKED, prec, 3, {10, 24, 4, 5}, {1, 0, 2, 3}, \
-{}, {}, {24, 2, 4, 5, 8}, {0, 1, 2, 3, 1})
-
-#define case_blocked_to_planar_0(prec) test_params_t(Layout::BLOCKED, Layout::NCHW, prec, 4, {2, 32, 10, 20}, {0, 1, 2, 3}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {}, {})
-#define case_blocked_to_planar_1(prec) test_params_t(Layout::BLOCKED, Layout::NCHW, prec, 4, {2, 32, 10, 20}, {0, 2, 3, 1}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {}, {})
-#define case_blocked_to_planar_2(prec) test_params_t(Layout::BLOCKED, Layout::NCHW, prec, 4, {2, 32, 10, 20}, {0, 2, 1, 3}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {}, {})
-#define case_blocked_to_planar_3(prec) test_params_t(Layout::BLOCKED, Layout::NCHW, prec, 4, {2, 32, 10, 20}, {0, 1, 3, 2}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {}, {})
-#define case_blocked_to_planar_4(prec) test_params_t(Layout::BLOCKED, Layout::NCHW, prec, 3, {10, 24, 4, 5}, {1, 0, 2, 3}, \
-{10, 3, 4, 5, 8}, {0, 1, 2, 3, 1}, {}, {})
-#define case_blocked_to_planar_5(prec) test_params_t(Layout::BLOCKED, Layout::NHWC, prec, 4, {2, 32, 10, 20}, {0, 1, 2, 3}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {}, {})
-#define case_blocked_to_planar_6(prec) test_params_t(Layout::BLOCKED, Layout::NHWC, prec, 4, {2, 32, 10, 20}, {0, 2, 3, 1}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {}, {})
-#define case_blocked_to_planar_7(prec) test_params_t(Layout::BLOCKED, Layout::NHWC, prec, 4, {2, 32, 10, 20}, {0, 2, 1, 3}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {}, {})
-#define case_blocked_to_planar_8(prec) test_params_t(Layout::BLOCKED, Layout::NHWC, prec, 4, {2, 32, 10, 20}, {0, 1, 3, 2}, \
-{2, 4, 10, 20, 8}, {0, 1, 2, 3, 1}, {}, {})
-#define case_blocked_to_planar_9(prec) test_params_t(Layout::BLOCKED, Layout::NHWC, prec, 3, {10, 24, 4, 5}, {1, 0, 2, 3}, \
-{10, 3, 4, 5, 8}, {0, 1, 2, 3, 1}, {}, {})
-
-test_params_t test_cases_fp32[] = {
-        case_planar_0(Precision::FP32),
-        case_planar_1(Precision::FP32),
-        case_planar_2(Precision::FP32),
-        case_planar_3(Precision::FP32),
-        case_planar_4(Precision::FP32),
-        case_planar_5(Precision::FP32),
-        case_planar_6(Precision::FP32),
-};
-
-test_params_t test_cases_s8[] = {
-        case_planar_0(Precision::I8),
-        case_planar_1(Precision::I8),
-        case_planar_2(Precision::I8),
-        case_planar_3(Precision::I8),
-        case_planar_4(Precision::I8),
-        case_planar_5(Precision::I8),
-        case_planar_6(Precision::I8),
-};
-
-test_params_t test_cases_blocked_fp32[] = {
-        case_blocked_0(Precision::FP32),
-        case_blocked_1(Precision::FP32),
-        case_blocked_2(Precision::FP32),
-        case_blocked_3(Precision::FP32),
-        case_blocked_4(Precision::FP32),
-        case_blocked_5(Precision::FP32),
-        case_blocked_6(Precision::FP32),
-        case_blocked_7(Precision::FP32),
-        case_blocked_8(Precision::FP32),
-        case_blocked_9(Precision::FP32),
-        case_blocked_10(Precision::FP32),
-};
-
-test_params_t test_cases_blocked_s8[] = {
-        case_blocked_0(Precision::I8),
-        case_blocked_1(Precision::I8),
-        case_blocked_2(Precision::I8),
-        case_blocked_3(Precision::I8),
-        case_blocked_4(Precision::I8),
-        case_blocked_5(Precision::I8),
-        case_blocked_6(Precision::I8),
-        case_blocked_7(Precision::I8),
-        case_blocked_8(Precision::I8),
-        case_blocked_9(Precision::I8),
-        case_blocked_10(Precision::I8),
-};
-
-test_params_t test_cases_planar_to_blocked_fp32[] = {
-        case_planar_to_blocked_0(Precision::FP32),
-        case_planar_to_blocked_1(Precision::FP32),
-        case_planar_to_blocked_2(Precision::FP32),
-        case_planar_to_blocked_3(Precision::FP32),
-        case_planar_to_blocked_4(Precision::FP32),
-        case_planar_to_blocked_5(Precision::FP32),
-        case_planar_to_blocked_6(Precision::FP32),
-        case_planar_to_blocked_7(Precision::FP32),
-        case_planar_to_blocked_8(Precision::FP32),
-        case_planar_to_blocked_9(Precision::FP32),
-};
-
-test_params_t test_cases_blocked_to_planar_fp32[] = {
-        case_blocked_to_planar_0(Precision::FP32),
-        case_blocked_to_planar_1(Precision::FP32),
-        case_blocked_to_planar_2(Precision::FP32),
-        case_blocked_to_planar_3(Precision::FP32),
-        case_blocked_to_planar_4(Precision::FP32),
-        case_blocked_to_planar_5(Precision::FP32),
-        case_blocked_to_planar_6(Precision::FP32),
-        case_blocked_to_planar_7(Precision::FP32),
-        case_blocked_to_planar_8(Precision::FP32),
-        case_blocked_to_planar_9(Precision::FP32),
-};
-
-test_params_t test_cases_planar_to_blocked_s8[] = {
-        case_planar_to_blocked_0(Precision::I8),
-        case_planar_to_blocked_1(Precision::I8),
-        case_planar_to_blocked_2(Precision::I8),
-        case_planar_to_blocked_3(Precision::I8),
-        case_planar_to_blocked_4(Precision::I8),
-        case_planar_to_blocked_5(Precision::I8),
-        case_planar_to_blocked_6(Precision::I8),
-        case_planar_to_blocked_7(Precision::I8),
-        case_planar_to_blocked_8(Precision::I8),
-        case_planar_to_blocked_9(Precision::I8),
-};
-
-test_params_t test_cases_blocked_to_planar_s8[] = {
-        case_blocked_to_planar_0(Precision::I8),
-        case_blocked_to_planar_1(Precision::I8),
-        case_blocked_to_planar_2(Precision::I8),
-        case_blocked_to_planar_3(Precision::I8),
-        case_blocked_to_planar_4(Precision::I8),
-        case_blocked_to_planar_5(Precision::I8),
-        case_blocked_to_planar_6(Precision::I8),
-        case_blocked_to_planar_7(Precision::I8),
-        case_blocked_to_planar_8(Precision::I8),
-        case_blocked_to_planar_9(Precision::I8),
-};
-
-
-INSTANTIATE_TEST_CASE_P(TestsPermutePlanar4d, permute_f32, test_cases_planar_4d(Precision::FP32));
-INSTANTIATE_TEST_CASE_P(TestsPermutePlanar5d, permute_f32, test_cases_planar_5d(Precision::FP32));
-INSTANTIATE_TEST_CASE_P(TestsPermute, permute_f32, ::testing::ValuesIn(test_cases_fp32));
-INSTANTIATE_TEST_CASE_P(TestsPermuteBlocked, permute_f32, ::testing::ValuesIn(test_cases_blocked_fp32));
-INSTANTIATE_TEST_CASE_P(TestsPermutePlanarToBlocked, permute_f32, ::testing::ValuesIn(test_cases_planar_to_blocked_fp32));
-INSTANTIATE_TEST_CASE_P(TestsPermuteBlockedToPlanar, permute_f32, ::testing::ValuesIn(test_cases_blocked_to_planar_fp32));
-
-INSTANTIATE_TEST_CASE_P(TestsPermutePlanar4d, permute_s8, test_cases_planar_4d(Precision::I8));
-INSTANTIATE_TEST_CASE_P(TestsPermutePlanar5d, permute_s8, test_cases_planar_5d(Precision::I8));
-INSTANTIATE_TEST_CASE_P(TestsPermute, permute_s8, ::testing::ValuesIn(test_cases_s8));
-INSTANTIATE_TEST_CASE_P(TestsPermuteBlocked, permute_s8, ::testing::ValuesIn(test_cases_blocked_s8));
-INSTANTIATE_TEST_CASE_P(TestsPermutePlanarToBlocked, permute_s8, ::testing::ValuesIn(test_cases_planar_to_blocked_s8));
-INSTANTIATE_TEST_CASE_P(TestsPermuteBlockedToPlanar, permute_s8, ::testing::ValuesIn(test_cases_blocked_to_planar_s8));
-
-class MKLDNNGraphDynBatchPermuteTests: public permute_f32 {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            permute_test_params p = initialize_permute_test_params();
-            std::string model = getModel(p);
-            size_t MB = p.dims[0];
-            if (MB < 2)
-                MB = 2;
-            p.dims[0] = MB;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            auto manager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
-            {
-                auto defaultExt = std::make_shared<Cpu::MKLDNNExtensions>();
-                defaultExt->layersFactory.registerNodeIfRequired(GraphPermuteTests, FakeLayer_permute, "FakeLayer_permute", Cpu::ImplFactory<FakeLayerImpl_permute>);
-                manager->AddExtension(defaultExt);
-            }
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network, manager);
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, p.dims, InferenceEngine::TensorDesc::getLayoutByDims(p.dims)});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkPermute = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Permute;
-            };
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkPermute);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkPermute);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchPermuteTests, TestsDynBatchPermute) {}
-
-test_params_t test_cases_dyn_batch[] = {
-        test_params_t(Layout::NCHW, Layout::NCHW, Precision::FP32, 2, {2, 3, 4, 5}, {0, 1, 2, 3}, {}, {}, {}, {}),
-        test_params_t(Layout::NCHW, Layout::NCHW, Precision::FP32, 2, {2, 3, 4, 5}, {0, 2, 3, 1}, {}, {}, {}, {}),
-        test_params_t(Layout::NCHW, Layout::NCHW, Precision::FP32, 2, {2, 3, 4, 5}, {0, 2, 1, 3}, {}, {}, {}, {}),
-        test_params_t(Layout::CHW, Layout::CHW, Precision::FP32, 2, {2, 3, 4}, {0, 1, 2}, {}, {}, {}, {}),
-        test_params_t(Layout::CHW, Layout::CHW, Precision::FP32, 2, {2, 3, 4}, {0, 2, 1}, {}, {}, {}, {}),
-        test_params_t(Layout::NC, Layout::NC, Precision::FP32, 2, {2, 3}, {0, 1}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {2, 3, 4, 5, 6}, {0, 1, 2, 3, 4}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {2, 3, 4, 5, 6}, {0, 4, 2, 1, 3}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {2, 3, 4, 5, 6}, {0, 2, 4, 3, 1}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {2, 3, 4, 5, 6}, {0, 3, 2, 4, 1}, {}, {}, {}, {}),
-        // FIXME: Plugin inserts reorder from blocked to goidhw format here
-        // test_params_t(Layout::BLOCKED, Layout::BLOCKED, Precision::FP32, 1, {2, 8, 2, 2, 4, 5}, {0, 1, 4, 2, 5, 3}, {}, {}, {}, {}),
-        // test_params_t(Layout::BLOCKED, Layout::BLOCKED, Precision::FP32, 1, {2, 8, 3, 3, 4, 5}, {0, 1, 4, 2, 5, 3}, {}, {}, {}, {}),
-        test_params_t(Layout::CHW, Layout::CHW, Precision::FP32, 2, {2, 12, 9}, {0, 2, 1}, {}, {}, {}, {}),
-        // test_params_t(Layout::BLOCKED, Layout::BLOCKED, Precision::FP32, 1, {2, 8, 3, 3, 4, 5}, {0, 3, 4, 1, 5, 2}, {}, {}, {}, {}),
-        test_params_t(Layout::NCHW, Layout::NCHW, Precision::FP32, 2, {2, 3, 4, 5}, {0, 1, 3, 2}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {2, 3, 4, 5, 7}, {0, 3, 1, 4, 2}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {2, 3, 4, 5, 7}, {0, 2, 1, 3, 4}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {2, 3, 4, 5, 7}, {0, 2, 4, 3, 1}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {2, 3, 4, 5, 7}, {0, 4, 2, 3, 1}, {}, {}, {}, {}),
-        test_params_t(Layout::NCHW, Layout::NCHW, Precision::FP32, 2, {2, 3, 4, 5}, {0, 3, 1, 2}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {3, 4, 7, 8, 4}, {0, 2, 3, 4, 1}, {}, {}, {}, {}),
-        test_params_t(Layout::NCDHW, Layout::NCDHW, Precision::FP32, 2, {3, 4, 7, 8, 4}, {0, 4, 1, 2, 3}, {}, {}, {}, {}),
-};
-
-INSTANTIATE_TEST_CASE_P(TestsDynBatchPermute, MKLDNNGraphDynBatchPermuteTests, ::testing::ValuesIn(test_cases_dyn_batch));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp
deleted file mode 100644
index 6c248eac720..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp
+++ /dev/null
@@ -1,504 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-
-#include "test_graph.hpp"
-
-#include <ie_plugin_config.hpp>
-#include "single_layer_common.hpp"
-#include <legacy/ie_layers.h>
-#include "tests_common.hpp"
-#include "ir_gen_helper.hpp"
-#include <math.h>
-
-#include <ie_core.hpp>
-
-using namespace InferenceEngine;
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-using namespace single_layer_tests;
-
-struct pooling_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> dims;
-    // Formats: WH, WHD
-    vector<size_t> kernel;
-    vector<size_t> strides;
-    vector<size_t> pads_begin;
-    vector<size_t> pads_end;
-
-    PoolingLayer::PoolType _type;
-    bool _exclude_pad;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-    vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_pool(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst, pooling_test_params prm)
-{
-    int dims_size = prm.dims.size();
-
-    int KW = prm.kernel[X_AXIS];
-    int KH = prm.kernel[Y_AXIS];
-    int KD = dims_size == 5 ? prm.kernel[Z_AXIS] : 1;
-
-    int SW = prm.strides[X_AXIS];
-    int SH = prm.strides[Y_AXIS];
-    int SD = prm.strides.size() > Z_AXIS ? prm.strides[Z_AXIS] : 1;
-
-    int IW = prm.dims[dims_size - 1];
-    int IH = prm.dims[dims_size - 2];
-    int ID = dims_size == 5 ? prm.dims[dims_size - 3] : 1;
-    
-    int PWB = prm.pads_begin[X_AXIS];
-    int PHB = prm.pads_begin[Y_AXIS];
-    int PDB = prm.pads_begin.size() > Z_AXIS ? prm.pads_begin[Z_AXIS] : 0;
-    int PWE = prm.pads_end[X_AXIS];
-    int PHE = prm.pads_end[Y_AXIS];
-    int PDE = prm.pads_end.size() > Z_AXIS ? prm.pads_end[Z_AXIS] : 0;
-
-    int OW = (IW + PWB + PWE - KW) / SW + 1;
-    int OH = (IH + PHB + PHE - KH) / SH + 1;
-    int OD = dims_size == 5 ? (ID + PDB + PDE - KD) / SD + 1 : 1;
-    int OC = prm.dims[1];
-
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    InferenceEngine::SizeVector dims = dst.getTensorDesc().getDims(); 
-    IE_ASSERT(OC == dims[1]);
-
-    int k1 = OH * OW,
-           k2 = k1 * OD,
-           k3 = IH * IW,
-           k4 = k3 * ID;
-
-    if (prm._type == PoolingLayer::MAX) {
-        for (int c = 0; c < OC; c++) {
-            int cc = c * k2;
-            for (int od = 0; od < OD; od++) {
-                int cd = cc + od * k1;
-                for (int oh = 0; oh < OH; oh++) {
-                    int ch = cd + oh * OW;
-                    for (int ow = 0; ow < OW; ow++) {
-
-                        int oidx = ch + ow;
-                        data_t out_ref = data_t(0);
-                        bool is_initialized = false;
-
-                        for (int kd = 0; kd < KD; kd++) {
-                            int id = dims_size == 5 ? od * SD - PDB + kd : 0lu;
-                            if (id < 0 || id >= ID) continue;
-                            for (int kh = 0; kh < KH; kh++) {
-                                int ih = oh * SH - PHB + kh;
-                                if (ih < 0 || ih >= IH) continue;
-                                for (int kw = 0; kw < KW; kw++) {
-                                    int iw = ow * SW - PWB + kw;
-                                    if (iw < 0 || iw >= IW) continue;
-                                    int iidx = c * k4
-                                                + id * k3
-                                                + ih * IW
-                                                + iw;
-
-                                    data_t d = src_data[iidx];
-                                    if (!is_initialized) {
-                                        out_ref = d;
-                                        is_initialized = true;
-                                    } else {
-                                        if (out_ref < d)
-                                            out_ref = d;
-                                    }
-                                }
-                            }
-                        }
-                        dst_data[oidx] = out_ref;
-                    }
-                }
-            }
-        }
-    } else if (prm._type == PoolingLayer::AVG) {
-
-        bool include_padding = false;
-        bool not_zero_l = false;
-        for (auto lr : prm.pads_begin) {
-            if (lr) {
-                not_zero_l = true;
-                break;
-            }
-        }
-        if (!prm._exclude_pad && not_zero_l)
-            include_padding = true;
-
-        int PDBKD = KD - PDB,
-            PHBKH = KH - PHB,
-            PWBKW = KW - PWB,
-            IDPDE = ID + PDE,
-            IHPHE = IH + PHE,
-            IWPWE = IW + PWE;
-
-        for (int c = 0; c < OC; c++) {
-            int cc = c * k2;
-            for (int od = 0; od < OD; od++) {
-                int cd = cc + od * k1;
-                int id_start = od * SD - PDB;
-                int id_end = std::min(od * SD + PDBKD, IDPDE);
-                for (int oh = 0; oh < OH; oh++) {
-                    int ch = cd + oh * OW;
-                    int ih_start = oh * SH - PHB;
-                    int ih_end = std::min(oh * SH + PHBKH, IHPHE);
-                    for (int ow = 0; ow < OW; ow++) {
-                        size_t oidx = ch + ow;
-                        dst_data[oidx] = (data_t)0;
-                        int iw_start = ow * SW - PWB;
-                        int iw_end = std::min(ow * SW + PWBKW, IWPWE);
-
-                        // include_padding
-                        double num_summands = (ih_end - ih_start) * (iw_end - iw_start) * (id_end - id_start);
-
-                        id_start = std::max(id_start, 0);
-                        ih_start = std::max(ih_start, 0);
-                        iw_start = std::max(iw_start, 0);
-                        id_end = std::min(id_end, ID);
-                        ih_end = std::min(ih_end, IH);
-                        iw_end = std::min(iw_end, IW);
-
-                        if (!include_padding)
-                            num_summands = (id_end - id_start) * (ih_end - ih_start) * (iw_end - iw_start);
-                        if (num_summands == 0.0) continue;
-
-                        double dst = 0.0;
-                        for (int id = id_start; id < id_end; ++id) {
-                            for (int ih = ih_start; ih < ih_end; ++ih) {
-                                for (int iw = iw_start; iw < iw_end; ++iw) {
-                                    size_t iidx = c * k4
-                                                + id * k3
-                                                + ih * IW
-                                                + iw;
-
-                                    dst += (double)src_data[iidx];
-                        }   }   }
-
-                        dst_data[oidx] = (data_t)(dst / num_summands);
-    }   }   }   }   }
-}
-
-class MKLDNNGraphPoolingTests: public TestsCommon,
-                                     public WithParamInterface<pooling_test_params> {
-    std::string layers_t = R"V0G0N(
-        <layer name="pool" id="1" type="Pooling" precision="FP32">
-
-            <pooling kernel="_K_"
-                     strides="_KS_"
-                     pads_begin="_PB_" pads_end="_PE_"
-                     pool-method="_PM_" exclude-pad="_EP_" rounding_type="floor"
-                     PrimitivesPriority="_IMPLS_"/>
-
-            <input>
-                <port id="1">
-                    __SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    __DST_DIMS__
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-)V0G0N";
-
-protected:
-    std::string getModel(pooling_test_params p) {
-        std::string model = layers_t;
-
-        std::string s_dims;
-        for (auto& dim : p.dims) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__SRC_DIMS__", s_dims);
-
-        s_dims = "";
-        int k_len = p.kernel.size();
-        for (size_t i = 2lu; i < p.dims.size(); i++) {
-            size_t inx = k_len - i + 1lu;
-            size_t dim = (p.dims[i] + p.pads_begin[inx] + p.pads_end[inx] - p.kernel[inx]) / p.strides[inx] + 1lu;
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__DST_DIMS__", s_dims);
-        
-        std::string pool_method;
-        switch (p._type) {
-            case PoolingLayer::AVG: pool_method = "avg";
-                break;
-            case PoolingLayer::ROI: pool_method = "roi";
-                break;
-            default: pool_method = "max";
-        }
-        REPLACE_WITH_STR(model, "_PM_", pool_method);
-        
-        std::string exclude_pad = "false";
-        if (p._exclude_pad) exclude_pad = "true";
-        REPLACE_WITH_STR(model, "_EP_", exclude_pad);
-
-        REPLACE_WITH_NUM(model, "_IN_", p.dims[0]);
-        REPLACE_WITH_NUM(model, "_IC_", p.dims[1]);
-
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.kernel);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.strides);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.pads_begin);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.pads_end);
-
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-
-        model = IRTemplateGenerator::getIRTemplate("Pooling_Only", p.dims, "FP32", model, edges_t);
-
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            pooling_test_params p = ::testing::WithParamInterface<pooling_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Pooling) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_TRUE(nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() | p.selectedType);
-                }
-            }
-
-            InferenceEngine::Layout layout = ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src =
-                InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, p.dims, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_pool(*srcPtr, dst_ref, p);
-
-            compare(*output, dst_ref, 0.0001f);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphPoolingTests, TestsPooling) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsPooling, MKLDNNGraphPoolingTests,
-        ::testing::Values(
-        /*0*/   pooling_test_params{{1, 3, 228, 228}, {2, 2}, {2, 2}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                pooling_test_params{{1, 3, 228, 228}, {4, 2}, {2, 2}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                pooling_test_params{{1, 3, 228, 228}, {4, 2}, {2, 1}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 3, MKLDNNPlugin::impl_desc_type::jit},
-                pooling_test_params{{1, 3, 228, 228}, {2, 2}, {2, 2}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 3, MKLDNNPlugin::impl_desc_type::ref,
-                            {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1, 3, 228, 228}, {4, 2}, {2, 2}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 3, MKLDNNPlugin::impl_desc_type::ref,
-                            {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1, 3, 228, 228}, {4, 2}, {2, 1}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 3, MKLDNNPlugin::impl_desc_type::ref,
-                            {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {1u, 0u}, {0u, 0u}, PoolingLayer::AVG, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {1u, 0u}, {0u, 0u}, PoolingLayer::AVG, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::jit },
-                pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {0u, 0u}, {0u, 0u}, PoolingLayer::AVG, true, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-        /*9*/   pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {0u, 0u}, {0u, 0u}, PoolingLayer::AVG, true, 3u,
-                            MKLDNNPlugin::impl_desc_type::jit },
-                pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, PoolingLayer::AVG, true, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, PoolingLayer::AVG, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-//                pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, PoolingLayer::MAX, false, 3u,
-//                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-//                pooling_test_params{{1u, 1u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, PoolingLayer::MAX, false, 1,
-//                                    MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // TODO Fix jit implementation. End paddings
-//                pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, {2u, 0u}, PoolingLayer::AVG, true, 3u,
-//                            MKLDNNPlugin::impl_desc_type::jit },
-//                pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, {2u, 0u}, PoolingLayer::AVG, false, 3u,
-//                            MKLDNNPlugin::impl_desc_type::jit },
-//                pooling_test_params{{1u, 4u, 128u, 128u}, {2u, 2u}, {2u, 2u}, {2u, 2u}, {2u, 0u}, PoolingLayer::MAX, false, 3u,
-//                            MKLDNNPlugin::impl_desc_type::jit },
-
-                // 5D tensor
-                pooling_test_params{{1u, 3u, 16u, 32u, 32u}, {2u, 2u, 2u}, {1u, 1u, 1u}, {0u, 0u, 0u}, {0u, 0u, 0u}, PoolingLayer::MAX, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1u, 3u, 16u, 32u, 32u}, {2u, 2u, 2u}, {1u, 1u, 1u}, {0u, 0u, 0u}, {0u, 0u, 0u}, PoolingLayer::MAX, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::jit },
-                pooling_test_params{{1u, 3u, 16u, 32u, 32u}, {2u, 2u, 2u}, {1u, 1u, 1u}, {1u, 1u, 1u}, {1u, 1u, 1u}, PoolingLayer::MAX, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1u, 32u, 60u, 60u, 60u}, {2u, 3u, 4u}, {2u, 2u, 2u}, {1u, 1u, 1u}, {1u, 2u, 3u}, PoolingLayer::MAX, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-//                pooling_test_params{{1u, 3u, 16u, 32u, 32u}, {2u, 2u, 2u}, {1u, 1u, 1u}, {1u, 2u, 3u}, {1u, 2u, 3u}, PoolingLayer::MAX, false, 1u,
-//                            MKLDNNPlugin::impl_desc_type::jit },
-                pooling_test_params{{1u, 4u, 128u, 128u, 128u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {1u, 0u, 0u}, {0u, 0u, 0u}, PoolingLayer::AVG, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1u, 4u, 128u, 128u, 128u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {1u, 0u, 0u}, {0u, 0u, 0u}, PoolingLayer::AVG, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::jit },
-                pooling_test_params{{1u, 4u, 128u, 128u, 128u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {0u, 0u, 0u}, {0u, 0u, 0u}, PoolingLayer::AVG, true, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1u, 4u, 128u, 128u, 128u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {0u, 0u, 0u}, {0u, 0u, 0u}, PoolingLayer::AVG, true, 3u,
-                            MKLDNNPlugin::impl_desc_type::jit },
-                pooling_test_params{{1u, 4u, 128u, 128u, 128u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {0u, 0u, 0u}, PoolingLayer::AVG, true, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1u, 4u, 128u, 128u, 128u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {2u, 2u, 2u}, PoolingLayer::AVG, true, 3u,
-                            MKLDNNPlugin::impl_desc_type::jit },
-                pooling_test_params{{1u, 4u, 128u, 128u, 128u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {2u, 2u, 2u}, PoolingLayer::AVG, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1u, 4u, 128u, 128u, 128u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {2u, 2u, 2u}, PoolingLayer::AVG, false, 3u,
-                            MKLDNNPlugin::impl_desc_type::jit },
-                pooling_test_params{{1u, 1u, 128u, 128u, 128u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {2u, 2u, 2u}, {2u, 2u, 2u}, PoolingLayer::AVG, false, 1u,
-                                    MKLDNNPlugin::impl_desc_type::ref }));
-
-
-class MKLDNNGraphDynBatchPoolingTests: public MKLDNNGraphPoolingTests {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            pooling_test_params p = ::testing::WithParamInterface<pooling_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.dims[0];
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-
-            InferenceEngine::Layout layout = ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-            InferenceEngine::Blob::Ptr src =
-                InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, p.dims, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkPooling = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Pooling;
-            };
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkPooling);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkPooling);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchPoolingTests, TestsDynBatchPooling) {}
-
-// TODO: rewrite to ngraph to have reshape functionality
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_TestsDynBatchPooling, MKLDNNGraphDynBatchPoolingTests,
-        ::testing::Values(
-                pooling_test_params{{1, 3, 228, 228}, {4, 2}, {2, 1}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 4, MKLDNNPlugin::impl_desc_type::jit},
-                pooling_test_params{{1, 3, 228, 228}, {2, 2}, {2, 2}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1, 3, 228, 228}, {4, 2}, {2, 2}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                pooling_test_params{{1, 3, 228, 228}, {4, 2}, {2, 1}, {0, 0}, {0, 0}, PoolingLayer::MAX, false, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp
deleted file mode 100644
index 0f7535a5a91..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp
+++ /dev/null
@@ -1,332 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct power_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in;
-
-    float power;
-    float scale;
-    float shift;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_power(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst, power_test_params prm) {
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    for (int i=0; i < src.size(); i++)
-        dst_data[i] = pow(src_data[i]*prm.scale + prm.shift, prm.power);
-}
-
-class MKLDNNGraphPowerTests: public TestsCommon,
-                                     public WithParamInterface<power_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Power_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="power" id="1" type="Power" precision="FP32">
-            <power_data power="_POWER_" scale="_SCALE_" shift="_SHIFT_"/>
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    std::string getModel(power_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-        REPLACE_WITH_NUM(model, "_POWER_", p.power);
-        REPLACE_WITH_NUM(model, "_SCALE_", p.scale);
-        REPLACE_WITH_NUM(model, "_SHIFT_", p.shift);
-
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            power_test_params p = ::testing::WithParamInterface<power_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Eltwise) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_power(*srcPtr, dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphPowerTests, TestsPower) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        TestsPower, MKLDNNGraphPowerTests,
-        ::testing::Values(
-                power_test_params{
-                        {1, 3, 13, 13}, 1, 2, 0.5f, 3, MKLDNNPlugin::impl_desc_type::unknown, {
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                },
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                },
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                }}},
-                power_test_params{{1, 1, 23, 23}, 3, 8, 2, 3 },
-                power_test_params{{1, 8, 23, 23}, 8, 2, 1, 3 },
-                power_test_params{{1, 8, 23, 23}, 2, 2, 4, 3 }
-        ));
-
-class MKLDNNGraphDynBatchPowerTests: public MKLDNNGraphPowerTests {
-    std::string model_t = R"V0G0N(
-<Net Name="Power_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="power" id="1" type="Power" precision="FP32">
-            <power_data power="_POWER_" scale="_SCALE_" shift="_SHIFT_"/>
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    std::string getModel(power_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-        REPLACE_WITH_NUM(model, "_POWER_", p.power);
-        REPLACE_WITH_NUM(model, "_SCALE_", p.scale);
-        REPLACE_WITH_NUM(model, "_SHIFT_", p.shift);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            power_test_params p = ::testing::WithParamInterface<power_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.in.n;
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = {MB, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkPower = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Eltwise;
-            };
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkPower);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkPower);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchPowerTests, TestsDynBatchPower) {}
-
-// TODO: rewrite to ngraph to have reshape functionality
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_TestsDynBatchPower, MKLDNNGraphDynBatchPowerTests,
-        ::testing::Values(
-                power_test_params{
-                        {1, 3, 13, 13}, 1, 2, 0.5f, 3, MKLDNNPlugin::impl_desc_type::unknown, {
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                },
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                },
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                }}},
-                power_test_params{{1, 1, 23, 23}, 3, 8, 2, 3 },
-                power_test_params{{1, 8, 23, 23}, 8, 2, 1, 3 },
-                power_test_params{{1, 8, 23, 23}, 2, 2, 4, 3 }
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp
deleted file mode 100644
index 9086e9a2c5d..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct relu_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> dims;
-
-    float n_clope;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_relu(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst, relu_test_params prm)
-{
-    auto dims_size = src.getTensorDesc().getDims().size();
-    
-    size_t IW = src.getTensorDesc().getDims()[dims_size - 1];
-    size_t IH = src.getTensorDesc().getDims()[dims_size - 2];
-    size_t ID = dims_size == 5 ? src.getTensorDesc().getDims()[dims_size - 3] : 1u;
-    size_t IC = src.getTensorDesc().getDims()[1];
-
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    for (uint32_t c = 0; c < IC; c++) {
-        for (uint32_t d = 0; d < ID; d++) {
-            for (uint32_t h = 0; h < IH; h++) {
-                for (uint32_t w = 0; w < IW; w++) {
-                    uint32_t oidx = c * ID * IH * IW
-                                    + d * IH * IW
-                                    + h * IW
-                                    + w;
-
-                    dst_data[oidx] = src_data[oidx] >= 0.0 ?
-                                     src_data[oidx] :
-                                     src_data[oidx] * prm.n_clope;
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNGraphReluTests: public TestsCommon,
-                                     public WithParamInterface<relu_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Relu_Only" version="3" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="norm" id="1" type="ReLU" precision="FP32">
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    std::string getModel(relu_test_params p) {
-        std::string model = model_t;
-        auto dims_size = p.dims.size();
-
-        switch (dims_size) {
-            case 3:
-                REMOVE_LINE(model, "<dim>_IH_</dim>");
-            case 4:
-                REMOVE_LINE(model, "<dim>_ID_</dim>");
-        }
-
-        REPLACE_WITH_NUM(model, "_IW_", p.dims[dims_size - 1]);
-        REPLACE_WITH_NUM(model, "_IC_", p.dims[1]);
-        REPLACE_WITH_NUM(model, "_IN_", p.dims[0]);
-        switch (dims_size) {
-            case 5:
-                REPLACE_WITH_NUM(model, "_ID_", p.dims[dims_size - 3]);
-            case 4:
-                REPLACE_WITH_NUM(model, "_IH_", p.dims[dims_size - 2]);
-        }
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            relu_test_params p = ::testing::WithParamInterface<relu_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Activation) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_TRUE(nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() | p.selectedType);
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = p.dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_relu(*srcPtr, dst_ref, p);
-
-            compare(*output, dst_ref, 0.0005f);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphReluTests, TestsRelu) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        TestsRelu, MKLDNNGraphReluTests,
-        ::testing::Values(
-                relu_test_params{
-                        {1, 3, 228, 228}, 0.0f, 5, MKLDNNPlugin::impl_desc_type::jit, {
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_TRUE(impl.getImplementationType() | MKLDNNPlugin::impl_desc_type::jit);
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                },
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_TRUE(impl.getImplementationType() | MKLDNNPlugin::impl_desc_type::jit);
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                }
-                        }},
-                relu_test_params{
-                        {1, 64, 32, 32, 32}, 0.0f, 3, MKLDNNPlugin::impl_desc_type::ref_any, {
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_TRUE(impl.getImplementationType() | MKLDNNPlugin::impl_desc_type::ref_any);
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                },
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_TRUE(impl.getImplementationType() | MKLDNNPlugin::impl_desc_type::ref_any);
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                }
-                        }}
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
deleted file mode 100644
index 5f43097726b..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-class MKLDNNGraphReorderTests: public TestsCommon {
-protected:
-    virtual void SetUp() {
-        TestsCommon::SetUp();
-    }
-};
-
-TEST_F(MKLDNNGraphReorderTests, cannotCreatePrimitiveDescriprorsWithoutOtherLayers) {
-    std::shared_ptr<MKLDNNPlugin::MKLDNNNode> node;
-    mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
-
-    InferenceEngine::CNNLayerPtr layer(new InferenceEngine::CNNLayer({"TestReorder", "Reorder", InferenceEngine::Precision::FP32}));
-    MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache;
-    node.reset(MKLDNNPlugin::MKLDNNNode::factory().create(layer, eng, {}, cache));
-    ASSERT_EQ(MKLDNNPlugin::Type::Reorder, node->getType());
-
-    ASSERT_THROW(node->getSupportedDescriptors(), InferenceEngine::Exception);
-}
-
-TEST_F(MKLDNNGraphReorderTests, CreateReorder) {
-    std::string model = R"V0G0N(
-<Net Name="Convolution_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>9</dim>
-                    <dim>16</dim>
-                    <dim>32</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" id="1" type="Convolution" precision="FP32">
-            <convolution stride-x="1" stride-y="1"
-                         pad-x="0"    pad-y="0"
-                         kernel-x="1" kernel-y="1"
-                         output="17"   group="1"/>
-
-            <weights offset="0" size="612" />
-            <biases offset="612" size="68" />
-
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>9</dim>
-                    <dim>16</dim>
-                    <dim>32</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>17</dim>
-                    <dim>16</dim>
-                    <dim>32</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8,
-                                                                                   {(1 * 1 * 17 * 9 / 1 + 17)
-                                                      * sizeof(float)}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    auto& nodes = graph.getNodes();
-    for (int i = 0; i < nodes.size(); i++) {
-        if (nodes[i]->getType() == MKLDNNPlugin::Reorder) {
-            ASSERT_EQ(1, nodes[i]->getSupportedPrimitiveDescriptors().size());
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref_any,
-                      nodes[i]->getSupportedPrimitiveDescriptors()[0].getImplementationType());
-            ASSERT_EQ(1, nodes[i]->getSupportedPrimitiveDescriptors()[0].getConfig().inConfs.size());
-            if (i == 1) {
-                ASSERT_EQ(InferenceEngine::Layout::NCHW, nodes[i]->getSupportedPrimitiveDescriptors()[0].getConfig().inConfs[0].desc.getLayout());
-                ASSERT_NE(InferenceEngine::Layout::NCHW, nodes[i]->getSupportedPrimitiveDescriptors()[0].getConfig().outConfs[0].desc.getLayout());
-            } else {
-                ASSERT_NE(InferenceEngine::Layout::NCHW, nodes[i]->getSupportedPrimitiveDescriptors()[0].getConfig().inConfs[0].desc.getLayout());
-                ASSERT_EQ(InferenceEngine::Layout::NCHW, nodes[i]->getSupportedPrimitiveDescriptors()[0].getConfig().outConfs[0].desc.getLayout());
-            }
-            ASSERT_EQ(1, nodes[i]->getSupportedPrimitiveDescriptors()[0].getConfig().outConfs.size());
-        }
-    }
-}
-
-TEST_F(MKLDNNGraphReorderTests, CreateInPlaceReorder) {
-    std::string model = R"V0G0N(
-<Net Name="InPlaceReorder_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>9</dim>
-                    <dim>16</dim>
-                    <dim>32</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="reshape1" id="1" type="Reshape" precision="FP32">
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>9</dim>
-                    <dim>16</dim>
-                    <dim>32</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>32</dim>
-                    <dim>144</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="reshape2" id="2" type="Reshape" precision="FP32">
-            <data axis="0" num_axes="-1" dim="1, 4608"/>
-            <input>
-                <port id="1">
-                    <dim>32</dim>
-                    <dim>144</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>48</dim>
-                    <dim>32</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="scaleshift" id="3" type="ScaleShift" precision="FP32">
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>48</dim>
-                    <dim>32</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>48</dim>
-                    <dim>32</dim>
-                </port>
-            </output>
-            <blobs>
-                <weights offset="0" size="12"/>
-                <biases offset="12" size="12"/>
-            </blobs>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="1"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {24}, InferenceEngine::C });
-    weights->allocate();
-    float *data = weights->buffer().as<float *>();
-    size_t dataSize = weights->byteSize() / sizeof(float);
-    for (size_t i = 0; i < dataSize; i++) {
-        data[i] = 2;
-    }
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-    
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    network.addOutput("reshape1");
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    InferenceEngine::SizeVector dims_src = {1, 9, 16, 32};
-
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-    src->allocate();
-    data = src->buffer().as<float *>();
-    dataSize = src->size();
-    for (size_t i = 0; i < dataSize; i++) {
-        data[i] = 1;
-    }
-
-    auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-    if (srcPtr == nullptr)
-        FAIL() << "Cannot cast blob to TBlob<float>.";
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-    InferenceEngine::OutputsDataMap out;
-    out = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs;
-
-    auto it = out.begin();
-    std::pair<std::string, InferenceEngine::DataPtr> item = *it;
-
-    InferenceEngine::TBlob<float>::Ptr output1;
-    output1 = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output1->allocate();
-    outputBlobs[item.first] = output1;
-
-    item = *(++it);
-
-    InferenceEngine::TBlob<float>::Ptr output2;
-    output2 = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output2->allocate();
-    outputBlobs[item.first] = output2;
-
-    graph.Infer(srcs, outputBlobs);
-
-    data = output1->data();
-    for (size_t i = 0; i < output1->size(); i++) {
-        ASSERT_EQ(data[i], 1);
-    }
-    data = output2->data();
-    for (size_t i = 0; i < output2->size(); i++) {
-        ASSERT_EQ(data[i], 4);
-    }
-}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp
deleted file mode 100644
index 503db07e574..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp
+++ /dev/null
@@ -1,304 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-
-#include <ie_core.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct reshape_test_params {
-    InferenceEngine::SizeVector in;
-    InferenceEngine::SizeVector out;
-    std::vector<size_t> shape;
-
-    int axis;
-    int num_axes;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_reshape(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst) {
-    const data_t *src_data = src.readOnly();
-    data_t *dst_data = dst.data();
-
-    for (int i=0; i < src.size(); i++)
-        dst_data[i] = src_data[i];
-}
-
-class MKLDNNGraphReshapeTests: public TestsCommon,
-                                     public WithParamInterface<reshape_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Reshape_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-__SRC_DIMS__
-                </port>
-            </output>
-        </layer>
-        <layer name="norm" id="1" type="Reshape" precision="FP32">
-            <data dim="_SHAPE_" axis="_AX_" num_axes="_NAX_"/>
-
-            <input>
-                <port id="1">
-__SRC_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="2">
-__DST_DIMS__
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    std::string getModel(reshape_test_params p) {
-        std::string model = model_t;
-
-		std::string src_dims;
-		for (auto& dim : p.in) {
-			src_dims += "                    <dim>";
-			src_dims += std::to_string(dim) + "</dim>\n";
-		}
-		REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
-
-		std::string dst_dims;
-		for (auto& dim : p.out) {
-			dst_dims += "\t\t<dim>";
-			dst_dims += std::to_string(dim) + "</dim>\n";
-		}
-		REPLACE_WITH_STR(model, "__DST_DIMS__", dst_dims);
-
-        REPLACE_WITH_NUM(model, "_AX_", p.axis);
-        REPLACE_WITH_NUM(model, "_NAX_", p.num_axes);
-
-        std::string shape_str;
-        for (auto& dim : p.shape) {
-            if (!shape_str.empty())
-                shape_str += ",";
-            shape_str += std::to_string(dim);
-        }
-        REPLACE_WITH_STR(model, "_SHAPE_", shape_str);
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            reshape_test_params p = ::testing::WithParamInterface<reshape_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Reshape) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
-                }
-            }
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, p.in,
-                                                                                       InferenceEngine::TensorDesc::getLayoutByDims(p.in)});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_reshape(*srcPtr, dst_ref);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphReshapeTests, TestsReshape) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        TestsReshape, MKLDNNGraphReshapeTests,
-        ::testing::Values(
-        reshape_test_params{ {1, 3, 228, 228}, {1, 24, 2, 3249}, {1, 24, 2, 3249}, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown, { [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 4 },{ 2, 2 },{ 2, 2 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::C, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::NC, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 4 },{ 1, 2, 2 },{ 1, 2, 2 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::C, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::CHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 4 },{ 1, 4, 1, 1 },{ 1, 4, 1, 1 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::C, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 4, 4 },{ 1, 4, 4 },{ 1, 4, 4 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::NC, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::CHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 4, 4 },{ 1, 4, 2, 2 },{ 1, 4, 2, 2 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::NC, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 4, 2, 2 },{ 1, 4, 2, 2 },{ 1, 4, 2, 2 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::CHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 2, 2 },{ 4 },{ 4 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::NC, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::C, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 1, 2, 2 },{ 4 },{ 4 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::CHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::C, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 1, 1, 2, 2 },{ 4 },{ 4 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::C, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 4, 2, 2 },{ 4, 4 },{ 4, 4 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::CHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::NC, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 1, 4, 2, 2 },{ 4, 4 },{ 4, 4 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::NC, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 1, 4, 2, 2 },{ 4, 2, 2 },{ 4, 2, 2 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown,{ [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::CHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 1, 4, 2, 2 }, { 4, 2, 2, 1, 1 }, { 4, 2, 2, 1, 1 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown, { [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 4, 2, 2, 1, 1 }, { 1, 4, 2, 2 }, { 1, 4, 2, 2 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown, { [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } },
-        reshape_test_params{ { 1, 200 }, { 1, 200, 1, 1, 1 }, { 1, 200, 1, 1, 1 }, 0, -1, 1,
-            MKLDNNPlugin::impl_desc_type::unknown, { [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-            ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-            ASSERT_EQ(1, impl.getConfig().inConfs.size());
-            ASSERT_EQ(1, impl.getConfig().outConfs.size());
-            ASSERT_EQ(InferenceEngine::Layout::NC, impl.getConfig().inConfs.at(0).desc.getLayout());
-            ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-        } } }
-));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp
deleted file mode 100644
index 0ffdfed272a..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp
+++ /dev/null
@@ -1,313 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <ie_system_conf.h>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct roi_pooling_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in1;
-
-    struct {
-        size_t n;
-        size_t c;
-    } in2;
-
-    size_t pooled_h;
-    size_t pooled_w;
-    float spatial_scale;
-
-    size_t num_prim_desc;
-
-    int selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_roipooling(const InferenceEngine::TBlob<data_t> &src, const InferenceEngine::TBlob<data_t> &roi,
-                    InferenceEngine::TBlob<data_t> &dst_blob, roi_pooling_test_params& params) {
-    data_t* dst = dst_blob.data();
-    const data_t* src_data = src.readOnly();
-    const data_t* src_roi = roi.readOnly();
-
-    int C = src.getTensorDesc().getDims()[1];
-    int H = src.getTensorDesc().getDims()[2];
-    int W = src.getTensorDesc().getDims()[3];
-
-    int ROIS = roi.getTensorDesc().getDims()[0];
-
-    double spatial_scale = params.spatial_scale;
-    int pooled_h = params.pooled_h;
-    int pooled_w = params.pooled_w;
-
-    auto *arg_max_ = new data_t[dst_blob.size()];
-
-    for (size_t i = 0; i < dst_blob.size(); i++) {
-        arg_max_[i] = -1;
-        dst[i] = -FLT_MAX;
-    }
-
-    int roi_off;
-
-    for (int n = 0; n < ROIS; ++n) {
-        if(roi.getTensorDesc().getDims().size() == 4) {
-            roi_off = n*roi.getTensorDesc().getDims()[1]*roi.getTensorDesc().getDims()[2]*roi.getTensorDesc().getDims()[3];
-        }
-        else {
-            roi_off = n*roi.getTensorDesc().getDims()[1];
-        }
-
-        const data_t* src_roi_ptr = &src_roi[roi_off];
-
-        int roi_batch_ind = src_roi_ptr[0];
-        int roi_start_w = round(src_roi_ptr[1] * spatial_scale);
-        int roi_start_h = round(src_roi_ptr[2] * spatial_scale);
-        int roi_end_w = round(src_roi_ptr[3] * spatial_scale);
-        int roi_end_h = round(src_roi_ptr[4] * spatial_scale);
-
-        int roi_height = (std::max)(roi_end_h - roi_start_h + 1, 1);
-        int roi_width = (std::max)(roi_end_w - roi_start_w + 1, 1);
-
-        for (int c = 0; c < C; ++c) {
-
-            for (int ph = 0; ph < pooled_h; ++ph) {
-                for (int pw = 0; pw < pooled_w; ++pw) {
-                    int hstart = (ph * roi_height) / pooled_h;
-                    if ( (hstart * pooled_h) > (ph * roi_height) ) {
-                        --hstart;
-                    }
-
-                    int wstart = (pw * roi_width) / pooled_w;
-                    if ( (wstart * pooled_w) > (pw * roi_width) ) {
-                        --wstart;
-                    }
-
-                    int hend = ((ph + 1) * roi_height) / pooled_h;
-                    if ( (hend * pooled_h) < ((ph + 1) * roi_height) ) {
-                        ++hend;
-                    }
-
-                    int wend = ((pw + 1) * roi_width) / pooled_w;
-                    if ( (wend * pooled_w) < ((pw + 1) * roi_width) ) {
-                        ++wend;
-                    }
-
-                    hstart = (std::min)((std::max)(hstart + roi_start_h, 0), H);
-                    hend = (std::min)((std::max)(hend + roi_start_h, 0), H);
-                    wstart = (std::min)((std::max)(wstart + roi_start_w, 0), W);
-                    wend = (std::min)((std::max)(wend + roi_start_w, 0), W);
-
-                    bool is_empty = (hend <= hstart) || (wend <= wstart);
-
-                    const int pool_index = n*dst_blob.getTensorDesc().getDims()[3]*dst_blob.getTensorDesc().getDims()[2]*dst_blob.getTensorDesc().getDims()[1] +
-                            c*dst_blob.getTensorDesc().getDims()[3]*dst_blob.getTensorDesc().getDims()[2] + ph*dst_blob.getTensorDesc().getDims()[3] + pw;
-
-                    if (is_empty) {
-                        dst[pool_index] = 0;
-                        arg_max_[pool_index] = -1;
-                    }
-
-                    for (int h = hstart; h < hend; ++h) {
-                        for (int w = wstart; w < wend; ++w) {
-                            int src_index_data = roi_batch_ind*src.getTensorDesc().getDims()[1]*src.getTensorDesc().getDims()[2]*src.getTensorDesc().getDims()[3] +
-                                                 c*src.getTensorDesc().getDims()[2]*src.getTensorDesc().getDims()[3] + h*src.getTensorDesc().getDims()[3] + w;
-                            data_t batch_data = src_data[src_index_data];
-
-                            if (batch_data > dst[pool_index]) {
-                                dst[pool_index] = batch_data;
-                                arg_max_[pool_index] = batch_data;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    delete[] arg_max_;
-}
-
-class MKLDNNGraphRoiPoolingTests: public TestsCommon,
-                                     public WithParamInterface<roi_pooling_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="ROIPooling_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN1_</dim>
-                    <dim>_IC1_</dim>
-                    <dim>_IH1_</dim>
-                    <dim>_IW1_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>_IN2_</dim>
-                    <dim>_IC2_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="roi_pool" id="2" type="ROIPooling" precision="FP32">
-            <data pooled_h="_PH_" pooled_w="_PW_" spatial_scale="_SS_"/>
-            <input>
-                <port id="2">
-                    <dim>_IN1_</dim>
-                    <dim>_IC1_</dim>
-                    <dim>_IH1_</dim>
-                    <dim>_IW1_</dim>
-                </port>
-                <port id="3">
-                    <dim>_IN2_</dim>
-                    <dim>_IC2_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>_ON_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="3"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    std::string getModel(roi_pooling_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW1_", p.in1.w);
-        REPLACE_WITH_NUM(model, "_IH1_", p.in1.h);
-        REPLACE_WITH_NUM(model, "_IC1_", p.in1.c);
-        REPLACE_WITH_NUM(model, "_IN1_", p.in1.n);
-
-        REPLACE_WITH_NUM(model, "_IC2_", p.in2.c);
-        REPLACE_WITH_NUM(model, "_IN2_", p.in2.n);
-
-        REPLACE_WITH_NUM(model, "_OW_", p.pooled_w);
-        REPLACE_WITH_NUM(model, "_OH_", p.pooled_h);
-        REPLACE_WITH_NUM(model, "_OC_", (std::max)(p.in1.c, p.in2.c));
-        REPLACE_WITH_NUM(model, "_ON_", (std::max)(p.in1.n, p.in2.n));
-
-        REPLACE_WITH_NUM(model, "_PH_", p.pooled_h);
-        REPLACE_WITH_NUM(model, "_PW_", p.pooled_w);
-        REPLACE_WITH_NUM(model, "_SS_", p.spatial_scale);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            roi_pooling_test_params p = ::testing::WithParamInterface<roi_pooling_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::ROIPooling) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-            InferenceEngine::SizeVector dims_src = {p.in1.n, p.in1.c, p.in1.h, p.in1.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::SizeVector dims_roi = {p.in2.n, p.in2.c};
-
-            InferenceEngine::Blob::Ptr roi = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_roi, InferenceEngine::NC});
-            roi->allocate();
-            fill_data(roi->buffer(), roi->size());
-
-            InferenceEngine::TBlob<float>* roiPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(roi.get());
-
-            if (roiPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", roi));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_roipooling(*srcPtr, *roiPtr, dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphRoiPoolingTests, TestsRoiPooling) {}
-
-const size_t expect_num_impl = 1;
-
-INSTANTIATE_TEST_CASE_P(
-        TestsRoiPooling, MKLDNNGraphRoiPoolingTests,
-        ::testing::Values(
-                roi_pooling_test_params{
-                        {1, 256, 39, 64},  // in1
-                        {150, 5},          // in2
-                        6, 6,              // pool H and W
-                        0.0625f,           // spatial_scale
-                        expect_num_impl,   // num_prim_desc (platform dependent)
-                        MKLDNNPlugin::impl_desc_type::jit
-                }));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp
deleted file mode 100644
index ca9bef048a6..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp
+++ /dev/null
@@ -1,473 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-#include <ie_core.hpp>
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct simplernms_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in_cls;
-
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in_delta;
-
-    struct {
-        size_t n;
-        size_t c;
-    } in_info;
-
-    struct {
-        size_t n;
-        size_t c;
-    } out;
-
-    size_t minBoxSize;
-    size_t featStride;
-    size_t preNmsTopn;
-    size_t postNmsTopn;
-    float iouThreshold;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-struct anchor { float start_x; float start_y; float end_x; float end_y; };
-
-template <typename data_t>
-struct simpler_nms_roi_t
-{
-    data_t x0, y0, x1, y1;
-
-    constexpr static inline const data_t clamp_v(const data_t v, const data_t v_min, const data_t v_max)
-    {
-        return (std::max)(v_min, (std::min)(v, v_max));
-    }
-
-    data_t area() const { return std::max<data_t>(0, y1 - y0 + 1) * std::max<data_t>(0, x1 - x0 + 1); }
-
-    simpler_nms_roi_t intersect (simpler_nms_roi_t other) const
-    {
-        return
-                {
-                        (std::max)(x0, other.x0),
-                        (std::max)(y0, other.y0),
-                        (std::min)(x1, other.x1),
-                        (std::min)(y1, other.y1)
-                };
-    }
-    simpler_nms_roi_t clamp (simpler_nms_roi_t other) const
-    {
-        return
-                {
-                        clamp_v(x0, other.x0, other.x1),
-                        clamp_v(y0, other.y0, other.y1),
-                        clamp_v(x1, other.x0, other.x1),
-                        clamp_v(y1, other.y0, other.y1)
-                };
-    }
-};
-
-template <typename data_t>
-struct simpler_nms_proposal_t { simpler_nms_roi_t<data_t> roi; data_t confidence; size_t ord; };
-template <typename data_t>
-struct simpler_nms_delta_t { data_t shift_x, shift_y, log_w, log_h; };
-
-template <typename data_t>
-inline simpler_nms_roi_t<data_t> simpler_nms_gen_bbox(
-        const anchor& box,
-        const simpler_nms_delta_t<data_t>& delta,
-        int anchor_shift_x,
-        int anchor_shift_y)
-{
-    auto anchor_w = box.end_x - box.start_x + 1;
-    auto anchor_h = box.end_y - box.start_y + 1;
-    auto center_x = box.start_x + anchor_w * .5f;
-    auto center_y = box.start_y + anchor_h *.5f;
-
-    data_t pred_center_x = delta.shift_x * anchor_w + center_x + anchor_shift_x;
-    data_t pred_center_y = delta.shift_y * anchor_h + center_y + anchor_shift_y;
-    data_t half_pred_w = exp(delta.log_w) * anchor_w * .5f;
-    data_t half_pred_h = exp(delta.log_h) * anchor_h * .5f;
-
-    return { pred_center_x - half_pred_w,
-             pred_center_y - half_pred_h,
-             pred_center_x + half_pred_w,
-             pred_center_y + half_pred_h };
-}
-template <typename data_t>
-inline void sort_and_keep_at_most_top_n(std::vector<simpler_nms_proposal_t<data_t>>& proposals, size_t top_n)
-{
-    const auto cmp_fn = [](const simpler_nms_proposal_t<data_t>& a,
-                           const simpler_nms_proposal_t<data_t>& b)
-    {
-        return a.confidence > b.confidence || (a.confidence == b.confidence && a.ord > b.ord);
-    };
-
-    if (proposals.size() > top_n) {
-        std::partial_sort(proposals.begin(), proposals.begin() + top_n, proposals.end(), cmp_fn);
-        proposals.resize(top_n);
-    }
-    else {
-        std::sort(proposals.begin(), proposals.end(), cmp_fn);
-    }
-}
-
-template <typename data_t>
-std::vector<simpler_nms_roi_t<data_t>> simpler_nms_perform_nms(const std::vector<simpler_nms_proposal_t<data_t>>& proposals,
-                                                       float iou_threshold, size_t top_n) {
-    //TODO(ruv): can I mark the 1st arg, proposals as const? ifndef DONT_PRECALC_AREA, i can
-    //TODO(ruv): is it better to do the precalc or not? since we need to fetch the floats from memory anyway for -
-    //           intersect calc, it's only a question of whether it's faster to do (f-f)*(f-f) or fetch another val
-#define DONT_PRECALC_AREA
-
-#ifndef DONT_PRECALC_AREA
-    std::vector<Dtype> areas;
-    areas.reserve(proposals.size());
-    std::transform(proposals.begin(), proposals.end(), areas.begin(), [](const simpler_nms_proposals_t>& v)
-                   {
-                       return v.roi.area();
-                   });
-#endif
-
-    std::vector<simpler_nms_roi_t<data_t>> res;
-    res.reserve(top_n);
-#ifdef DONT_PRECALC_AREA
-    for (const auto & prop : proposals) {
-        const auto bbox = prop.roi;
-        const data_t area = bbox.area();
-#else
-        size_t proposal_count = proposals.size();
-        for (size_t proposalIndex = 0; proposalIndex < proposal_count; ++proposalIndex) {
-            const auto & bbox = proposals[proposalIndex].roi;
-#endif
-
-        // For any realistic WL, this condition is true for all top_n values anyway
-        if (prop.confidence > 0) {
-            bool overlaps = std::any_of(res.begin(), res.end(), [&](const simpler_nms_roi_t<data_t>& res_bbox)
-            {
-                data_t interArea = bbox.intersect(res_bbox).area();
-#ifdef DONT_PRECALC_AREA
-                data_t unionArea = res_bbox.area() + area - interArea;
-#else
-                data_t unionArea = res_bbox.area() + areas[proposalIndex] - interArea;
-#endif
-                return interArea > iou_threshold * unionArea;
-            });
-
-            if (! overlaps) {
-                res.push_back(bbox);
-                if (res.size() == top_n) break;
-            }
-        }
-    }
-
-    return res;
-}
-
-template <typename data_t>
-void ref_simplernms(const InferenceEngine::TBlob<data_t> &src_cls, const InferenceEngine::TBlob<data_t> &src_delta, const InferenceEngine::TBlob<data_t> &src_info, InferenceEngine::TBlob<data_t> &dst_blob, simplernms_test_params prm) {
-    int anchors_num = 3 * 3;
-    data_t *anchors_ = new data_t[anchors_num * sizeof(anchor) / sizeof(float)];
-    const anchor* anchors = (anchor*)anchors_;
-
-    IE_ASSERT(src_cls.getTensorDesc().getDims().size() == 4);
-    int H = src_cls.getTensorDesc().getDims()[2];
-    int W = src_cls.getTensorDesc().getDims()[3];
-
-    int SZ = H * W;
-
-    data_t* dst = dst_blob.data();
-
-    const data_t* cls_scores = src_cls.readOnly();
-    const data_t* delta_pred = src_delta.readOnly();
-    const data_t* im_info = src_info.readOnly();
-
-    int IW = im_info[0];
-    int IH = im_info[1];
-    int IS = im_info[2];
-
-    int scaled_min_bbox_size = prm.minBoxSize * IS;
-
-    std::vector<simpler_nms_proposal_t<data_t>> sorted_proposals_confidence;
-
-    for (auto y = 0; y < H; ++y)
-    {
-        int anchor_shift_y = y * prm.featStride;
-
-        for (auto x = 0; x < W; ++x) {
-            int anchor_shift_x = x * prm.featStride;
-            int location_index = y * W + x;
-
-            // we assume proposals are grouped by window location
-            for (int anchor_index = 0; anchor_index < anchors_num ; anchor_index++) {
-                data_t dx0 = delta_pred[location_index + SZ * (anchor_index * 4 + 0)];
-                data_t dy0 = delta_pred[location_index + SZ * (anchor_index * 4 + 1)];
-                data_t dx1 = delta_pred[location_index + SZ * (anchor_index * 4 + 2)];
-                data_t dy1 = delta_pred[location_index + SZ * (anchor_index * 4 + 3)];
-
-                simpler_nms_delta_t<data_t> bbox_delta { dx0, dy0, dx1, dy1 };
-
-                data_t proposal_confidence = cls_scores[location_index + SZ * (anchor_index + anchors_num * 1)];
-
-                simpler_nms_roi_t<data_t> tmp_roi = simpler_nms_gen_bbox(anchors[anchor_index], bbox_delta, anchor_shift_x, anchor_shift_y);
-                simpler_nms_roi_t<data_t> roi = tmp_roi.clamp({ 0, 0, data_t(IW - 1), data_t(IH - 1) });
-
-                int bbox_w = roi.x1 - roi.x0 + 1;
-                int bbox_h = roi.y1 - roi.y0 + 1;
-
-                if (bbox_w >= scaled_min_bbox_size && bbox_h >= scaled_min_bbox_size) {
-                    simpler_nms_proposal_t<data_t> proposal { roi, proposal_confidence, sorted_proposals_confidence.size() };
-                    sorted_proposals_confidence.push_back(proposal);
-                }
-            }
-        }
-    }
-
-    sort_and_keep_at_most_top_n(sorted_proposals_confidence, prm.preNmsTopn);
-    auto res = simpler_nms_perform_nms(sorted_proposals_confidence, prm.iouThreshold, prm.postNmsTopn);
-
-    size_t res_num_rois = res.size();
-
-    for (size_t i = 0; i < res_num_rois; ++i) {
-        dst[5 * i + 0] = 0;    // roi_batch_ind, always zero on test time
-        dst[5 * i + 1] = res[i].x0;
-        dst[5 * i + 2] = res[i].y0;
-        dst[5 * i + 3] = res[i].x1;
-        dst[5 * i + 4] = res[i].y1;
-    }
-
-    delete[] anchors_;
-}
-
-class MKLDNNGraphSimplerNMSTests: public TestsCommon,
-                                     public WithParamInterface<simplernms_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Lrn_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_INC_</dim>
-                    <dim>_ICC_</dim>
-                    <dim>_IHC_</dim>
-                    <dim>_IWC_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in2" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>_IND_</dim>
-                    <dim>_ICD_</dim>
-                    <dim>_IHD_</dim>
-                    <dim>_IWD_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="in3" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="2">
-                    <dim>_INI_</dim>
-                    <dim>_ICI_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="proposal" id="3" type="SimplerNMS" precision="FP32">
-            <data cls_threshold="0.500000" max_num_proposals="300" iou_threshold="_IOU_THRESHOLD_"
-            min_bbox_size="_MIN_BOX_SIZE_" feat_stride="_FSRD_" pre_nms_topn="_PRENT_" post_nms_topn="_POSTNT_"
-            scale="8.000000,16.000000,32.000000"/>
-
-            <input>
-                <port id="3">
-                    <dim>_INC_</dim>
-                    <dim>_ICC_</dim>
-                    <dim>_IHC_</dim>
-                    <dim>_IWC_</dim>
-                </port>
-                <port id="4">
-                    <dim>_IND_</dim>
-                    <dim>_ICD_</dim>
-                    <dim>_IHD_</dim>
-                    <dim>_IWD_</dim>
-                </port>
-                <port id="5">
-                    <dim>_INI_</dim>
-                    <dim>_ICI_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="6">
-                    <dim>_ON_</dim>
-                    <dim>_OC_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="3" to-port="3"/>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="4"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="5"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    std::string getModel(simplernms_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IWC_", p.in_cls.w);
-        REPLACE_WITH_NUM(model, "_IHC_", p.in_cls.h);
-        REPLACE_WITH_NUM(model, "_ICC_", p.in_cls.c);
-        REPLACE_WITH_NUM(model, "_INC_", p.in_cls.n);
-
-        REPLACE_WITH_NUM(model, "_IWD_", p.in_delta.w);
-        REPLACE_WITH_NUM(model, "_IHD_", p.in_delta.h);
-        REPLACE_WITH_NUM(model, "_ICD_", p.in_delta.c);
-        REPLACE_WITH_NUM(model, "_IND_", p.in_delta.n);
-
-        REPLACE_WITH_NUM(model, "_ICI_", p.in_info.c);
-        REPLACE_WITH_NUM(model, "_INI_", p.in_info.n);
-
-        REPLACE_WITH_NUM(model, "_OC_", p.out.c);
-        REPLACE_WITH_NUM(model, "_ON_", p.out.n);
-
-        REPLACE_WITH_NUM(model, "_MIN_BOX_SIZE_", p.minBoxSize);
-        REPLACE_WITH_NUM(model, "_FSRD_", p.featStride);
-        REPLACE_WITH_NUM(model, "_PRENT_", p.preNmsTopn);
-        REPLACE_WITH_NUM(model, "_POSTNT_", p.postNmsTopn);
-        REPLACE_WITH_NUM(model, "_IOU_THRESHOLD_", p.iouThreshold);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            simplernms_test_params p = ::testing::WithParamInterface<simplernms_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::SimplerNMS) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
-                }
-            }
-            InferenceEngine::SizeVector dims_src_cls = {p.in_cls.n, p.in_cls.c, p.in_cls.h, p.in_cls.w};
-
-            InferenceEngine::Blob::Ptr src_cls = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src_cls, InferenceEngine::NCHW});
-            src_cls->allocate();
-            fill_data(src_cls->buffer(), src_cls->size());
-
-            InferenceEngine::TBlob<float>* srcClsPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src_cls.get());
-
-            if (srcClsPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::SizeVector dims_delta = {p.in_delta.n, p.in_delta.c, p.in_delta.h, p.in_delta.w};
-
-            InferenceEngine::Blob::Ptr src_delta = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_delta, InferenceEngine::NCHW});
-            src_delta->allocate();
-            fill_data(src_delta->buffer(), src_delta->size());
-
-            InferenceEngine::TBlob<float>* srcDeltaPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src_delta.get());
-
-            if (srcDeltaPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::SizeVector dims_info = {p.in_info.n, p.in_info.c};
-
-            InferenceEngine::Blob::Ptr src_info = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_info, InferenceEngine::NC});
-            src_info->allocate();
-            fill_data(src_info->buffer(), src_info->size());
-            float * data_info = src_info->buffer();
-            data_info[0] = 20;
-            data_info[1] = 20;
-            data_info[2] = 3;
-
-            InferenceEngine::TBlob<float>* srcInfoPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src_info.get());
-
-            if (srcInfoPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src_cls));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src_delta));
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in3", src_info));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_simplernms(*srcClsPtr, *srcDeltaPtr, *srcInfoPtr, dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphSimplerNMSTests, TestsSimplerNMS) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_TestsSimplerNMS, MKLDNNGraphSimplerNMSTests,
-        ::testing::Values(
-                simplernms_test_params{{1, 18, 39, 64}, {1, 36, 39, 64}, {1, 3}, {150, 5}, 16, 16, 6000, 150, 0.7f, 1,
-                                       MKLDNNPlugin::impl_desc_type::ref, {
-                                         [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                             ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
-                                             ASSERT_EQ(3, impl.getConfig().inConfs.size());
-                                             ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                             ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                             ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(1).desc.getLayout());
-                                             ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(2).desc.getLayout());
-                                             ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                         }
-                                 }}));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp
deleted file mode 100644
index c7c941bacf6..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp
+++ /dev/null
@@ -1,419 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct softmax_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> dims;
-
-    int axis;
-
-    size_t num_prim_desc;
-
-    int selectedType;
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void check_softmax_fwd(const InferenceEngine::TBlob<data_t> &src, softmax_test_params prm)
-{
-    const data_t *src_data = src.readOnly();
-
-    auto dims_size = prm.dims.size();
-
-    int axis = prm.axis;
-    if (dims_size == 4 && axis > 1)
-        axis++;
-
-    size_t W = prm.dims[dims_size - 1];
-    size_t H = prm.dims[dims_size - 2];
-    size_t D = dims_size == 5 ? prm.dims[dims_size - 3] : 1u;
-    size_t C = prm.dims[1];
-    size_t MB = prm.dims[0];
-
-    auto off = [=](int n, int c, int d, int h, int w)
-    {
-        return (n * W * H * D * C + c * W * H * D + d * W * H + h * W + w);
-    };
-
-    auto check_norm = [=](double res) {
-        if(res < 0.999f || res > 1.001) {
-            ASSERT_TRUE(res > 0.99f && res < 1.01);
-        }
-    };
-
-    if(axis == 0) {
-        for (int c = 0; c < C; ++c) {
-            for (int d = 0; d < D; ++d) {
-                for (int h = 0; h < H; ++h) {
-                    for (int w = 0; w < W; ++w) {
-                        double result = 0.0f;
-
-                        for (int n = 0; n < MB; ++n) {
-                            result += src_data[off(n, c, d, h, w)];
-                        }
-                        check_norm(result);
-                    }
-                }
-            }
-        }
-    }
-    else if(axis == 1) {
-        for (int n = 0; n < MB; ++n) {
-            for (int d = 0; d < D; ++d) {
-                for (int h = 0; h < H; ++h) {
-                    for (int w = 0; w < W; ++w) {
-                        double result = 0.0f;
-
-                        for (int c = 0; c < C; ++c) {
-                            result += src_data[off(n, c, d, h, w)];//dst_ptr[map_index(dst_pd, off(n, c, h, w))];
-                        }
-
-                        check_norm(result);
-                    }
-                }
-            }
-        }
-    }
-    else if(axis == 2) {
-        for (int n = 0; n < MB; ++n) {
-            for (int c = 0; c < C; ++c) {
-                for (int h = 0; h < H; ++h) {
-                    for (int w = 0; w < W; ++w) {
-                        double result = 0.0f;
-
-                        for (int d = 0; d < D; ++d) {
-                            result += src_data[off(n, c, d, h, w)];//dst_ptr[map_index(dst_pd, off(n, c, h, w))];
-                        }
-
-                        check_norm(result);
-                    }
-                }
-            }
-        }
-    }
-    else if(axis == 3) {
-        for (int n = 0; n < MB; ++n) {
-            for (int c = 0; c < C; ++c) {
-                for (int d = 0; d < D; ++d) {
-                    for (int w = 0; w < W; ++w) {
-                        double result = 0.0f;
-
-                        for (int h = 0; h < H; ++h) {
-                            result += src_data[off(n, c, d, h, w)];//dst_ptr[map_index(dst_pd, off(n, c, h, w))];
-                        }
-
-                        check_norm(result);
-                    }
-                }
-            }
-        }
-    }
-    else if(axis == 4) {
-        for (int n = 0; n < MB; ++n) {
-            for (int c = 0; c < C; ++c) {
-                for (int d = 0; d < D; ++d) {
-                    for (int h = 0; h < H; ++h) {
-                        double result = 0.0f;
-
-                        for (int w = 0; w < W; ++w) {
-                            result += src_data[off(n, c, d, h, w)];//dst_ptr[map_index(dst_pd, off(n, c, h, w))];
-                        }
-
-                        check_norm(result);
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNGraphSoftMaxTests: public TestsCommon,
-                                     public WithParamInterface<softmax_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Lrn_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="norm" id="1" type="Softmax" precision="FP32">
-            <data PrimitivesPriority="_IMPLS_" axis="_AX_"/>
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    std::string getModel(softmax_test_params p) {
-        std::string model = model_t;
-
-        auto dims_size = p.dims.size();
-        switch (dims_size) {
-            case 3:
-                REMOVE_LINE(model, "<dim>_IH_</dim>");
-            case 4:
-                REMOVE_LINE(model, "<dim>_ID_</dim>");
-        }
-
-        REPLACE_WITH_NUM(model, "_IW_", p.dims[dims_size - 1]);
-        REPLACE_WITH_NUM(model, "_IC_", p.dims[1]);
-        REPLACE_WITH_NUM(model, "_IN_", p.dims[0]);
-        switch (dims_size) {
-            case 5:
-                REPLACE_WITH_NUM(model, "_ID_", p.dims[dims_size - 3]);
-            case 4:
-                REPLACE_WITH_NUM(model, "_IH_", p.dims[dims_size - 2]);
-        }
-
-        REPLACE_WITH_NUM(model, "_AX_", p.axis);
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            softmax_test_params p = ::testing::WithParamInterface<softmax_test_params>::GetParam();
-            std::string model = getModel(p);
-            
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::SoftMax) {
-                    ASSERT_LE(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType() & p.selectedType);
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = p.dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            check_softmax_fwd(*output, p);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphSoftMaxTests, TestsSoftMax) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        TestsSoftMax, MKLDNNGraphSoftMaxTests,
-        ::testing::Values(
-                softmax_test_params{{1, 3, 228, 228}, 1, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{1, 3, 228, 228}, 1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                softmax_test_params{{1, 100, 6, 1}, 1, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{1, 100, 6, 1}, 1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                softmax_test_params{{1, 1000, 1, 1}, 1, 1, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{8, 1000, 1, 1}, 1, 1, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{1, 19, 128, 128}, 1, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{1, 19, 128, 128}, 1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-//                softmax_test_params{{8, 100, 81, 1}, 2, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{8, 100, 81, 1}, 2, 1, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                softmax_test_params{{1, 1, 1, 1}, 3, 1, MKLDNNPlugin::impl_desc_type::jit},
-//                softmax_test_params{{1, 1, 1, 33}, 3, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{1, 1, 1, 33}, 3, 1, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-//                softmax_test_params{{8, 1, 10, 81}, 3, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{8, 1, 10, 81}, 3, 1, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                softmax_test_params{{2, 5, 9, 10, 11}, 0, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{2, 5, 9, 10, 11}, 1, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{2, 5, 9, 10, 11}, 2, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{2, 5, 9, 10, 11}, 3, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{2, 5, 9, 10, 11}, 4, 1, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}
-                ));
-
-class MKLDNNGraphDynBatchSoftMaxTests: public MKLDNNGraphSoftMaxTests {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            softmax_test_params p = ::testing::WithParamInterface<softmax_test_params>::GetParam();
-            std::string model = getModel(p);
-            InferenceEngine::SizeVector dims_src = p.dims;
-            size_t MB = dims_src[0];
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, layout});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkSoftmax = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::SoftMax;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkSoftmax);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkSoftmax);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchSoftMaxTests, TestsDynBatchSoftMax) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDynBatchSoftMax, MKLDNNGraphDynBatchSoftMaxTests,
-        ::testing::Values(
-                // TODO: rewrite to ngraph to have reshape functionality
-                // softmax_test_params{{1, 3, 228, 228}, 1, 2, MKLDNNPlugin::impl_desc_type::jit},
-                // softmax_test_params{{1, 3, 228, 228}, 1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // softmax_test_params{{1, 100, 6, 1}, 1, 2, MKLDNNPlugin::impl_desc_type::jit},
-                // softmax_test_params{{1, 100, 6, 1}, 1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // softmax_test_params{{1, 1000, 1, 1}, 1, 1, MKLDNNPlugin::impl_desc_type::ref},
-                // softmax_test_params{{1, 19, 128, 128}, 1, 2, MKLDNNPlugin::impl_desc_type::jit},
-                // softmax_test_params{{1, 19, 128, 128}, 1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                // softmax_test_params{{1, 1, 1, 1}, 3, 1, MKLDNNPlugin::impl_desc_type::ref},
-                // softmax_test_params{{1, 1, 1, 33}, 3, 1, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                softmax_test_params{{8, 1000, 1, 1}, 1, 1, MKLDNNPlugin::impl_desc_type::ref},
-//                softmax_test_params{{8, 100, 81, 1}, 2, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{8, 100, 81, 1}, 2, 1, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-//                softmax_test_params{{1, 1, 1, 33}, 3, 2, MKLDNNPlugin::impl_desc_type::jit},
-//                softmax_test_params{{8, 1, 10, 81}, 3, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{8, 1, 10, 81}, 3, 1, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                softmax_test_params{{2, 5, 9, 10, 11}, 1, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{2, 5, 9, 10, 11}, 2, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{2, 5, 9, 10, 11}, 3, 2, MKLDNNPlugin::impl_desc_type::jit},
-                softmax_test_params{{2, 5, 9, 10, 11}, 4, 1, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref_any}}
-                ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
deleted file mode 100644
index 3a3f69ab563..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
+++ /dev/null
@@ -1,501 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct split_test_params {
-    // Formats: NCHW, NCDHW
-    vector<size_t> dims;
-    std::vector<vector<size_t>> outs;
-
-    int axis;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-template <typename data_t>
-void ref_split(InferenceEngine::TBlob<data_t> &src, std::vector<InferenceEngine::TBlob<data_t>>& dsts, split_test_params& prm) {
-    const float * srcData = src.readOnly();
-
-    int outerSize = 1;
-    for (int i = 0; i < prm.axis; i++)
-        outerSize *= src.getTensorDesc().getDims()[i];
-
-    for (size_t osIdx = 0; osIdx < outerSize; osIdx++) {
-        for (size_t dstIdx = 0; dstIdx < dsts.size(); dstIdx++) {
-            float* dstData = dsts[dstIdx].data();
-            int innerSize = dsts[dstIdx].size() / outerSize;
-
-            for (size_t j = 0; j < innerSize; j++, srcData++) {
-                dstData[osIdx*innerSize + j] = *srcData;
-            }
-        }
-    }
-}
-
-class MKLDNNGraphSplitTests: public TestsCommon,
-                              public WithParamInterface<split_test_params> {
-    std::string model_t = R"V0G0N(
-<net name="ConcatOnly" version="3" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="split" id="2" type="Split" precision="FP32">
-            <split_data axis="_AXIS_" PrimitivesPriority="_IMPLS_"/>
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_ID_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                _OP_
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string port_t = R"V0G0N(
-<port id="_ID_">
-    <dim>_N_</dim>
-    <dim>_C_</dim>
-    <dim>_D_</dim>
-    <dim>_H_</dim>
-    <dim>_W_</dim>
-</port>
-)V0G0N";
-
-protected:
-    std::string getModel(split_test_params p) {
-        std::string model = model_t;
-        auto dims_size = p.dims.size();
-
-        switch (dims_size) {
-            case 3:
-                REMOVE_LINE(model, "<dim>_IH_</dim>");
-            case 4:
-                REMOVE_LINE(model, "<dim>_ID_</dim>");
-        }
-        REPLACE_WITH_NUM(model, "_IN_", p.dims[0]);
-        REPLACE_WITH_NUM(model, "_IC_", p.dims[1]);
-        REPLACE_WITH_NUM(model, "_IW_", p.dims[dims_size - 1]);
-        switch (dims_size) {
-            case 5:
-                REPLACE_WITH_NUM(model, "_ID_", p.dims[dims_size - 3]);
-            case 4:
-                REPLACE_WITH_NUM(model, "_IH_", p.dims[dims_size - 2]);
-        }
-
-        std::string outPorts;
-        for (int idx = 0; idx < p.outs.size(); idx++) {
-            std::string outPort = port_t;
-            switch (dims_size) {
-                case 3:
-                    REMOVE_LINE(outPort, "<dim>_H_</dim>");
-                case 4:
-                    REMOVE_LINE(outPort, "<dim>_D_</dim>");
-            }
-            REPLACE_WITH_NUM(outPort, "_ID_", idx);
-            REPLACE_WITH_NUM(outPort, "_N_", p.outs[idx][0]);
-            REPLACE_WITH_NUM(outPort, "_C_", p.outs[idx][1]);
-            REPLACE_WITH_NUM(outPort, "_W_", p.outs[idx][dims_size - 1]);
-            switch (dims_size) {
-                case 5:
-                    REPLACE_WITH_NUM(outPort, "_D_", p.outs[idx][dims_size - 3]);
-                case 4:
-                    REPLACE_WITH_NUM(outPort, "_H_", p.outs[idx][dims_size - 2]);
-            }
-
-            outPorts += outPort;
-        }
-        REPLACE_WITH_STR(model, "_OP_", outPorts);
-
-        REPLACE_WITH_NUM(model, "_AXIS_", p.axis);
-
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-        return model;
-    }
-
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            split_test_params p = ::testing::WithParamInterface<split_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr());
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Split) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
-                }
-            }
-            ASSERT_LE(3, nodes.size());
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(network.getInputsInfo().begin()->second->getTensorDesc());
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            auto srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-            std::vector<InferenceEngine::TBlob<float>> dst_refs;
-            for (auto& item : out) {
-                InferenceEngine::TBlob<float>::Ptr output;
-                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-                output->allocate();
-                outputBlobs[item.first] = output;
-
-                InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-                dst_ref.allocate();
-                dst_refs.push_back(dst_ref);
-            }
-
-            graph.Infer(srcs, outputBlobs);
-
-            ref_split(*srcPtr, dst_refs, p);
-
-            int ref_idx = 0;
-            for (auto& output : outputBlobs) {
-                compare(*output.second, dst_refs[ref_idx++], 0.0005f);
-            }
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphSplitTests, TestsSplit) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsSplit, MKLDNNGraphSplitTests,
-        ::testing::Values(
-                split_test_params {
-                        {1, 24, 2, 5},
-                        {{1, 16, 2, 5}, {1, 8, 2, 5}},
-                        1, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {1, 20, 2, 5},
-                        {{1, 13, 2, 5}, {1, 7, 2, 5}},
-                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {1, 20, 2, 5},
-                        {{1, 10, 2, 5}, {1, 10, 2, 5}},
-                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {2, 20, 2, 5},
-                        {{2, 10, 2, 5}, {2, 10, 2, 5}},
-                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {2, 20, 2, 5},
-                        {{2, 15, 2, 5}, {2,  5, 2, 5}},
-                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {9, 11, 7, 5},
-                        {{3, 11, 7, 5}, {6, 11, 7, 5}},
-                        0, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {3, 11, 7, 5},
-                        {{3, 11, 4, 5}, {3, 11, 3, 5}},
-                        2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {3, 11, 7, 5},
-                        {{3, 11, 7, 1}, {3, 11, 7, 4}},
-                        3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {5, 6, 7, 15},
-                        {{1, 6, 7, 15}, {2, 6, 7, 15}, {1, 6, 7, 15}, {1, 6, 7, 15}},
-                        0, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {5, 6, 7, 15},
-                        {{5, 1, 7, 15}, {5, 2, 7, 15}, {5, 1, 7, 15}, {5, 2, 7, 15}},
-                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {5, 6, 7, 15},
-                        {{5, 6, 3, 15}, {5, 6, 1, 15}, {5, 6, 2, 15}, {5, 6, 1, 15}},
-                        2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {5, 6, 7, 15},
-                        {{5, 6, 7, 5}, {5, 6, 7, 3}, {5, 6, 7, 4}, {5, 6, 7, 3}},
-                        3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {5, 6, 7, 15},
-                        {{5, 6, 7, 15}},
-                        1, 4, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
-                split_test_params {
-                        {1, 32, 16, 16, 16},
-                        {{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
-                        1, 6, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}},
-                split_test_params {
-                        {1, 32, 16, 16, 16},
-                        {{1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}, {1, 8, 16, 16, 16}},
-                        1, 6, MKLDNNPlugin::impl_desc_type::unknown, {}}));
-
-class MKLDNNGraphDynBatchSplitTests: public MKLDNNGraphSplitTests {
-protected:
-    virtual void SetUp() {
-        try {
-            split_test_params p = ::testing::WithParamInterface<split_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.dims[0];
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(network.getInputsInfo().begin()->second->getTensorDesc());
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            auto* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-            auto it = out.begin();
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *it;
-
-            InferenceEngine::TBlob<float>::Ptr output1;
-            output1 = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output1->allocate();
-            outputBlobs[item.first] = output1;
-
-            item = *(++it);
-            InferenceEngine::TBlob<float>::Ptr output2;
-            output2 = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output2->allocate();
-            outputBlobs[item.first] = output2;
-
-            auto checkSplit = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Split;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkSplit);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkSplit);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchSplitTests, TestsDynBatchSplit) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDynBatchSplit, MKLDNNGraphDynBatchSplitTests,
-        ::testing::Values(
-                // TODO: rewrite to ngraph to have reshape functionality
-                // split_test_params {
-                //         {1, 24, 2, 5},
-                //         {{1, 16, 2, 5}, {1, 8, 2, 5}},
-                //         1, 3, MKLDNNPlugin::impl_desc_type::unknown, {}, {
-                //                 [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                //                     ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
-                //                     ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                //                     ASSERT_EQ(2, impl.getConfig().outConfs.size());
-                //                     ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
-                //                 },
-                //                 [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                //                     ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                //                     ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                //                     ASSERT_EQ(2, impl.getConfig().outConfs.size());
-                //                     ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
-                //                 },
-                //                 [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                //                     ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                //                     ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                //                     ASSERT_EQ(2, impl.getConfig().outConfs.size());
-                //                     ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(1).desc.getLayout());
-                //                 }
-                //         }
-                // },
-                // TODO: rewrite to ngraph to have reshape functionality
-                // split_test_params {
-                //         {1, 20, 2, 5},
-                //         {{1, 13, 2, 5}, {1, 7, 2, 5}},
-                //         1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
-                //                 [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                //                     ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
-                //                     ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                //                     ASSERT_EQ(2, impl.getConfig().outConfs.size());
-                //                     ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
-                //                 },
-                //                 [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                //                     ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                //                     ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                //                     ASSERT_EQ(2, impl.getConfig().outConfs.size());
-                //                     ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
-                //                 }
-                //         }
-                // },
-                // TODO: rewrite to ngraph to have reshape functionality
-                // split_test_params {
-                //         {1, 20, 2, 5},
-                //         {{1, 10, 2, 5}, {1, 10, 2, 5}},
-                //         1, 2, MKLDNNPlugin::impl_desc_type::unknown, {}, {
-                //                 [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                //                     ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
-                //                     ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                //                     ASSERT_EQ(2, impl.getConfig().outConfs.size());
-                //                     ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().inConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::ANY, impl.getConfig().outConfs.at(1).desc.getLayout());
-                //                 },
-                //                 [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                //                     ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                //                     ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                //                     ASSERT_EQ(2, impl.getConfig().outConfs.size());
-                //                     ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                //                     ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(1).desc.getLayout());
-                //                 }
-                //         }
-                // },
-                split_test_params {
-                        {2, 24, 2, 5},
-                        {{2, 16, 2, 5}, {2, 8, 2, 5}},
-                        1, 5, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                // TODO: rewrite to ngraph to have reshape functionality
-                // split_test_params {
-                //         {1, 20, 2, 5},
-                //         {{1, 13, 2, 5}, {1, 7, 2, 5}},
-                //         1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                // },
-                // TODO: rewrite to ngraph to have reshape functionality
-                // split_test_params {
-                //         {1, 20, 2, 5},
-                //         {{1, 10, 2, 5}, {1, 10, 2, 5}},
-                //         1, 2, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                // },
-                split_test_params {
-                        {2, 20, 2, 5},
-                        {{2, 10, 2, 5}, {2, 10, 2, 5}},
-                        1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {2, 20, 2, 5},
-                        {{2, 15, 2, 5}, {2,  5, 2, 5}},
-                        1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {3, 11, 7, 5},
-                        {{3, 11, 4, 5}, {3, 11, 3, 5}},
-                        2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {3, 11, 7, 5},
-                        {{3, 11, 7, 1}, {3, 11, 7, 4}},
-                        3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {5, 6, 7, 15},
-                        {{5, 1, 7, 15}, {5, 2, 7, 15}, {5, 1, 7, 15}, {5, 2, 7, 15}},
-                        1, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {5, 6, 7, 15},
-                        {{5, 6, 3, 15}, {5, 6, 1, 15}, {5, 6, 2, 15}, {5, 6, 1, 15}},
-                        2, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}
-                },
-                split_test_params {
-                        {5, 6, 7, 15},
-                        {{5, 6, 7, 5}, {5, 6, 7, 3}, {5, 6, 7, 4}, {5, 6, 7, 3}},
-                        3, 3, MKLDNNPlugin::impl_desc_type::ref, {MKLDNNPlugin::impl_desc_type::ref}}));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp
deleted file mode 100644
index 6e5bafe8787..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp
+++ /dev/null
@@ -1,281 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-
-struct tile_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in;
-
-    size_t axis;
-    size_t tiles;
-
-    size_t num_prim_desc;
-
-    MKLDNNPlugin::impl_desc_type selectedType;
-
-    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
-};
-
-
-template <typename data_t>
-void ref_tile(const InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst_blob, tile_test_params prm) {
-    const float* m_src = src.readOnly();
-    int m_outer_dim = 1;
-    int m_inner_dim = 1;
-
-    for (int i=0; i < prm.axis; i++ )
-        m_outer_dim *= src.getTensorDesc().getDims()[i];
-    for (int i=prm.axis; i < src.getTensorDesc().getDims().size(); i++ )
-        m_inner_dim *= src.getTensorDesc().getDims()[i];
-
-    float* dst = dst_blob.data();
-
-    for (int i = 0; i < m_outer_dim; ++i) {
-        for (int t = 0; t < prm.tiles; ++t) {
-            memcpy(dst, m_src, m_inner_dim* sizeof(float));
-            dst += m_inner_dim;
-        }
-        m_src += m_inner_dim;
-    }
-}
-
-class MKLDNNGraphTileTests: public TestsCommon,
-                                     public WithParamInterface<tile_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Tile_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="tile" id="1" type="Tile" precision="FP32">
-            <data axis="_AX_" tiles="_TL_"/>
-
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_ON_</dim>
-                    <dim>_OC_</dim>
-                    <dim>_OH_</dim>
-                    <dim>_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</Net>
-)V0G0N";
-
-protected:
-    std::string getModel(tile_test_params p) {
-        std::string model = model_t;
-
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-
-        REPLACE_WITH_NUM(model, "_OW_", (p.axis == 3) ? p.in.w*p.tiles : p.in.w);
-        REPLACE_WITH_NUM(model, "_OH_", (p.axis == 2) ? p.in.h*p.tiles : p.in.h);
-        REPLACE_WITH_NUM(model, "_OC_", (p.axis == 1) ? p.in.c*p.tiles : p.in.c);
-        REPLACE_WITH_NUM(model, "_ON_", (p.axis == 0) ? p.in.n*p.tiles : p.in.n);
-
-        REPLACE_WITH_NUM(model, "_AX_", p.axis);
-        REPLACE_WITH_NUM(model, "_TL_", p.tiles);
-
-        return model;
-    }
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            tile_test_params p = ::testing::WithParamInterface<tile_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-            auto& nodes = graph.getNodes();
-            for (int i = 0; i < nodes.size(); i++) {
-                if (nodes[i]->getType() == MKLDNNPlugin::Tile) {
-                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
-                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
-                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
-                    }
-                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
-                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
-                }
-            }
-
-            InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-
-            ref_tile(*srcPtr, dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphTileTests, TestsTile) {}
-
-
-INSTANTIATE_TEST_CASE_P(
-        TestsTile, MKLDNNGraphTileTests,
-        ::testing::Values(
-                tile_test_params{
-                        {1, 128, 1, 1}, 3, 24, 1, MKLDNNPlugin::impl_desc_type::unknown, {
-                                         [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                             ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                                             ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                             ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                         }
-                                 }}));
-
-class MKLDNNGraphDynBatchTileTests: public MKLDNNGraphTileTests {
-protected:
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            tile_test_params p = ::testing::WithParamInterface<tile_test_params>::GetParam();
-            std::string model = getModel(p);
-            size_t MB = p.in.n;
-            if (MB < 2)
-                MB = 2;
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-            ASSERT_EQ(nullptr, network.getFunction());
-            auto implNet = static_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
-            InferenceEngine::ResponseDesc resp;
-            InferenceEngine::StatusCode sts  = implNet->setBatchSizeReshape(MB, &resp);
-            ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
-
-            MKLDNNGraphTestClass graph;
-            graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = {MB, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            auto checkTile = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
-                return node->getType() == MKLDNNPlugin::Tile;
-            };
-
-            graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkTile);
-            graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkTile);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDynBatchTileTests, TestsDynBatchTile) {}
-
-
-// TODO: rewrite to ngraph to have reshape functionality
-INSTANTIATE_TEST_CASE_P(
-        DISABLED_TestsDynBatchTile, MKLDNNGraphDynBatchTileTests,
-        ::testing::Values(
-                tile_test_params{
-                        {1, 128, 1, 1}, 3, 24, 1, MKLDNNPlugin::impl_desc_type::unknown, {
-                                [](MKLDNNPlugin::PrimitiveDescInfo impl) {
-                                    ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
-                                    ASSERT_EQ(1, impl.getConfig().inConfs.size());
-                                    ASSERT_EQ(1, impl.getConfig().outConfs.size());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
-                                    ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
-                                }
-                        }}));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp
deleted file mode 100644
index a62cb9cf0b8..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include "ir_gen_helper.hpp"
-#include <ie_core.hpp>
-#include <legacy/details/ie_cnn_network_iterator.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-using namespace single_layer_tests;
-using namespace InferenceEngine;
-
-struct concat_params {
-    size_t axis;
-};
-
-struct conv_concat_params {
-    // Formats: NCHW, NCDHW
-    std::vector<size_t> in;
-
-    CommonTestUtils::conv_common_params conv;
-    concat_params concat;
-
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-};
-
-class MKLDNNConvConcatTests: public TestsCommon,
-                                    public WithParamInterface<conv_concat_params> {
-    std::string layers_t = R"V0G0N(
-        <layer id="2" name="convolution_1" precision="FP32" type="convolution">
-            <data kernel="_K_" strides="_KS_"
-             pads_begin="_PB_" pads_end="_PE_"
-             dilations="1,1,1" output="_OC_" group="_GC_" PrimitivesPriority="_IMPLS_"/>
-            <input>
-                <port id="0">
-                    __INP_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    __CONV_OUT_DIMS__
-                </port>
-            </output>
-            <blobs>
-                <weights offset="0" size="262144"/>
-            </blobs>
-	</layer>
-        <layer id="3" name="concat0" precision="FP32" type="Concat">
-            <data axis="__AXIS__"/>
-            <input>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    __CONV_OUT_DIMS__
-                </port>
-                <port id="1">
-                    __INP_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    __CONCAT_OUT_DIMS__
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
-        <edge from-layer="0" from-port="0" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-)V0G0N";
-
-    std::string getModel(conv_concat_params p) {
-        std::string model = layers_t;
-
-        std::string s_dims;
-        for (auto& dim : p.in) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__INP_DIMS__", s_dims);
-
-        s_dims = "";
-        size_t conv_axis_val = p.in[p.concat.axis];
-        int k_len = p.conv.kernel.size();
-        for (size_t i = 2lu; i < p.in.size(); i++) {
-            size_t inx = k_len - i + 1;
-            size_t dim = (p.in[i] + 2lu * p.conv.pads_begin[inx] - p.conv.kernel[inx]) / p.conv.stride[inx] + 1lu;
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-            if (i == p.concat.axis) {
-                conv_axis_val = dim;
-            }
-        }
-	REPLACE_WITH_STR(model, "__CONV_OUT_DIMS__", s_dims);
-
-        s_dims = "";
-        for (size_t i = 0lu; i < p.in.size(); i++) {
-            size_t val = p.in[i];
-            if (i == p.concat.axis) {
-                val += conv_axis_val;
-            }
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(val) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__CONCAT_OUT_DIMS__", s_dims);
-
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.conv.kernel);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.conv.stride);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.conv.pads_begin);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.conv.pads_end);
-        REPLACE_WITH_NUM(model, "_GC_", p.conv.group);
-        REPLACE_WITH_NUM(model, "_OC_", p.conv.out_c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in[0]);
-        REPLACE_WITH_NUM(model, "__AXIS__", p.concat.axis);
-
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-
-        model = IRTemplateGenerator::getIRTemplate("convolution_Concat", p.in, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            conv_concat_params p = ::testing::WithParamInterface<conv_concat_params>::GetParam();
-            std::string model = getModel(p);
-
-            size_t blob_size = p.conv.out_c * p.in[1] / p.conv.group;
-            for (size_t i = 0; i < p.conv.kernel.size(); i++) {
-                blob_size *= p.conv.kernel[i];
-            }
-            blob_size = (blob_size + p.conv.out_c);
-            InferenceEngine::SizeVector dims_weights = { blob_size };
-
-            std::vector<InferenceEngine::Blob::Ptr> blob_to_model;
-            InferenceEngine::Blob::Ptr weights = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, dims_weights, InferenceEngine::C });
-            weights->allocate();
-            fill_data(weights->buffer().as<float*>(), weights->size());
-            blob_to_model.push_back(weights);
-
-            InferenceEngine::Blob::Ptr bias = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, {p.conv.out_c}, InferenceEngine::C });
-            bias->allocate();
-            fill_data(bias->buffer().as<float*>(), bias->size());
-            blob_to_model.push_back(bias);
-
-            size_t total_size_in_bytes = 0;
-            for (InferenceEngine::Blob::Ptr blb : blob_to_model) total_size_in_bytes += blb->byteSize();
-
-            InferenceEngine::TBlob<uint8_t>::Ptr model_blob =
-                    InferenceEngine::make_shared_blob<uint8_t>({ InferenceEngine::Precision::U8, {total_size_in_bytes}, InferenceEngine::C });
-            model_blob->allocate();
-            uint8_t* model_blob_ptr = model_blob->buffer().as<uint8_t*>();
-            for (InferenceEngine::Blob::Ptr blb : blob_to_model) {
-                memcpy(model_blob_ptr, blb->buffer().as<uint8_t*>(), blb->byteSize());
-                model_blob_ptr += blb->byteSize();
-            }
-            
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, model_blob));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = p.in;
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(
-                    {InferenceEngine::Precision::FP32, dims_src, InferenceEngine::TensorDesc::getLayoutByDims(p.in)});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            details::CNNNetworkIterator l(network), end;
-            for ( ; l != end; ++l) {
-                (*l)->params["PrimitivesPriority"] = "cpu:ref,cpu:ref_any";
-            }
-            MKLDNNGraphTestClass graph2;
-            graph2.CreateGraph(network);
-
-            InferenceEngine::BlobMap outputBlobs2;
-
-            InferenceEngine::TBlob<float>::Ptr output2;
-            output2 = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output2->allocate();
-            outputBlobs2[item.first] = output2;
-
-            graph.Infer(srcs, outputBlobs2);
-
-            compare(*output, *output2, 0.0005f);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNConvConcatTests, TestsConvConcat) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsConvConcat, MKLDNNConvConcatTests,
-        ::testing::Values(
-                conv_concat_params{{1, 256, 4, 4},
-                                     { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 256, false },
-                                     {1}},
-                conv_concat_params{{2, 256, 4, 4},
-                                     { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 256, false },
-                                     {1}},
-                conv_concat_params{{1, 256, 4, 4, 4},
-                                     { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 256, false },
-                                     {1}},
-                conv_concat_params{{2, 256, 4, 4, 4},
-                                     { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 256, false },
-                                     {1}},
-                conv_concat_params{{1, 256, 4, 4},
-                                     { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 256, false },
-                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
-                conv_concat_params{{2, 256, 4, 4},
-                                     { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 256, false },
-                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
-                conv_concat_params{{1, 256, 4, 4, 4},
-                                     { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 256, false },
-                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
-                conv_concat_params{{2, 256, 4, 4, 4},
-                                     { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 256, false },
-                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}}
-        ));
-
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp
deleted file mode 100644
index 19b7e52d039..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp
+++ /dev/null
@@ -1,337 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "common_test_utils/data_utils.hpp"
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-constexpr auto depthwise_scale_shift = mkldnn::algorithm::depthwise_scale_shift;
-constexpr auto depthwise_prelu = mkldnn::algorithm::depthwise_prelu;
-
-struct conv_params {
-    size_t krn_w;
-    size_t krn_h;
-    size_t str_w;
-    size_t str_h;
-    size_t pad_w;
-    size_t pad_h;
-    size_t out_c;
-    size_t grp_c;
-};
-
-struct conv_depthwise_fusing_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in;
-
-    conv_params conv;
-    algorithm depthwise_alg;
-    bool isBroadcast;
-};
-
-template <typename data_t>
-void ref_conv_depthwise(const InferenceEngine::TBlob<data_t> &src, const data_t *weights,
-              InferenceEngine::TBlob<data_t> &dst, conv_depthwise_fusing_test_params& prm) {
-    size_t KW = prm.conv.krn_w;
-    size_t KH = prm.conv.krn_h;
-    size_t GC = prm.conv.grp_c;
-
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t IH = src.getTensorDesc().getDims()[2];
-    size_t IW = src.getTensorDesc().getDims()[3];
-
-    size_t OW = (IW + 2 * prm.conv.pad_w - prm.conv.krn_w) / prm.conv.str_w + 1;
-    size_t OH = (IH + 2 * prm.conv.pad_h - prm.conv.krn_h) / prm.conv.str_h + 1;
-    size_t OC = prm.conv.out_c;
-
-    const data_t *src_data = src.readOnly();
-    const data_t *weights_data = weights;
-    const data_t *bias_data = weights_data + KW * KH * OC * IC / GC;
-    data_t *dst_data = dst.data();
-
-    const data_t *d_weights_data = bias_data + OC;
-    const data_t *d_bias_data = (prm.isBroadcast) ? d_weights_data + 1 : d_weights_data + OC;
-
-    for (uint32_t g = 0; g < GC; g++) {
-        for (uint32_t oc = 0; oc < OC / GC; oc++) {
-            for (uint32_t oh = 0; oh < OH; oh++) {
-                for (uint32_t ow = 0; ow < OW; ow++) {
-                    size_t bidx = g * OC / GC + oc;
-                    size_t oidx = g * OC / GC * OH * OW
-                                  + oc * OH * OW + oh * OW + ow;
-                    dst_data[oidx] = bias_data[bidx];
-
-                    for (size_t ic = 0; ic < IC / GC; ic++) {
-                        for (size_t kh = 0; kh < KH; kh++) {
-                            for (size_t kw = 0; kw < KW; kw++) {
-                                int32_t iw = ow * prm.conv.str_w - prm.conv.pad_w + kw;
-                                int32_t ih = oh * prm.conv.str_h - prm.conv.pad_h + kh;
-                                if (iw < 0 || iw >= (int32_t)IW || ih < 0
-                                    || ih >= (int32_t)IH)
-                                    continue;
-                                size_t iidx = g * IC / GC * IH * IW
-                                              + ic * IH * IW + ih * IW + iw;
-                                size_t widx = g * OC / GC * IC / GC * KH * KW
-                                              + oc * IC / GC * KH * KW
-                                              + ic * KH * KW + kh * KW + kw;
-
-                                dst_data[oidx] += src_data[iidx] * weights_data[widx];
-                            }
-                        }
-                    }
-
-
-                    switch(prm.depthwise_alg) {
-                        case depthwise_scale_shift:
-                            dst_data[oidx] = d_weights_data[prm.isBroadcast ? 0 : bidx] * dst_data[oidx] + d_bias_data[prm.isBroadcast ? 0 : bidx];
-                            break;
-                        case depthwise_prelu:
-                            dst_data[oidx] = dst_data[oidx] >= 0 ? dst_data[oidx] : d_weights_data[prm.isBroadcast ? 0 : bidx] * dst_data[oidx];
-                            break;
-                        default:
-                            assert("Unsupported depthwise algorithm");
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNGraphConvDepthwiseFusingTests: public TestsCommon,
-                                    public WithParamInterface<conv_depthwise_fusing_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Convolution_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv" id="1" type="Convolution" precision="FP32">
-            <convolution stride-x="_C_SW_" stride-y="_C_SH_"
-                         pad-x="_C_PW_"    pad-y="_C_PH_"
-                         kernel-x="_C_KW_" kernel-y="_C_KH_"
-                         output="_C_OC_"   group="_C_GC_"/>
-
-            <weights offset="0" size="_C_S1_" />
-            <biases offset="_C_S1_" size="_C_S2_" />
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_C_OC_</dim>
-                    <dim>_C_OH_</dim>
-                    <dim>_C_OW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="depthwise" id="2" type="_LT_" precision="FP32">
-            <data _P_NAME_="_P_VAL_"  PrimitivesPriority="_IMPLS_"/>
-            <weights offset="_D_S0_" size="_D_S1_" />
-            <biases offset="_D_S2_" size="_D_S3_" />
-
-            <input>
-                <port id="3">
-                    <dim>_IN_</dim>
-                    <dim>_C_OC_</dim>
-                    <dim>_C_OH_</dim>
-                    <dim>_C_OW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>_IN_</dim>
-                    <dim>_C_OC_</dim>
-                    <dim>_C_OH_</dim>
-                    <dim>_C_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    std::string getModel(conv_depthwise_fusing_test_params p) {
-        std::string model = model_t;
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-
-        REPLACE_WITH_NUM(model, "_C_KW_", p.conv.krn_w);
-        REPLACE_WITH_NUM(model, "_C_KH_", p.conv.krn_h);
-        REPLACE_WITH_NUM(model, "_C_SW_", p.conv.str_w);
-        REPLACE_WITH_NUM(model, "_C_SH_", p.conv.str_h);
-        REPLACE_WITH_NUM(model, "_C_PW_", p.conv.pad_w);
-        REPLACE_WITH_NUM(model, "_C_PH_", p.conv.pad_h);
-        REPLACE_WITH_NUM(model, "_C_GC_", p.conv.grp_c);
-        REPLACE_WITH_NUM(model, "_C_OC_", p.conv.out_c);
-        size_t c_oh = (p.in.h + 2 * p.conv.pad_h - p.conv.krn_h) / p.conv.str_h + 1;
-        size_t c_ow = (p.in.w + 2 * p.conv.pad_w - p.conv.krn_w) / p.conv.str_w + 1;
-        REPLACE_WITH_NUM(model, "_C_OH_", c_oh);
-        REPLACE_WITH_NUM(model, "_C_OW_", c_ow);
-
-        size_t conv_w_data_size = (p.conv.krn_w * p.conv.krn_h * p.conv.out_c * p.in.c / p.conv.grp_c) * sizeof(float);
-        size_t conv_b_data_size = p.conv.out_c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_C_S1_", conv_w_data_size);
-        REPLACE_WITH_NUM(model, "_C_S2_", conv_b_data_size);
-
-        if (p.depthwise_alg == depthwise_scale_shift) {
-            REPLACE_WITH_STR(model, "_LT_", "ScaleShift");
-            REPLACE_WITH_STR(model, "_P_NAME_", "broadcast");
-            REPLACE_WITH_NUM(model, "_P_VAL_", p.isBroadcast ? 1 : 0);
-
-        }
-        else if (p.depthwise_alg == depthwise_prelu) {
-            REPLACE_WITH_STR(model, "_LT_", "PReLU");
-            REPLACE_WITH_STR(model, "_P_NAME_", "channel_shared");
-            REPLACE_WITH_NUM(model, "_P_VAL_", p.isBroadcast ? 1 : 0);
-        }
-
-        size_t array_size =  p.isBroadcast ? 1 : p.conv.out_c;
-        size_t depthwise_w_data_size = array_size * sizeof(float);
-        size_t depthwise_b_data_size = array_size * sizeof(float);
-        REPLACE_WITH_NUM(model, "_D_S0_", conv_w_data_size + conv_b_data_size);
-        REPLACE_WITH_NUM(model, "_D_S1_", depthwise_w_data_size);
-        REPLACE_WITH_NUM(model, "_D_S2_", conv_w_data_size + conv_b_data_size + depthwise_w_data_size);
-        REPLACE_WITH_NUM(model, "_D_S3_", depthwise_b_data_size);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            conv_depthwise_fusing_test_params p = ::testing::WithParamInterface<conv_depthwise_fusing_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            size_t conv_w_size = p.conv.krn_w * p.conv.krn_h * p.conv.out_c * p.in.c / p.conv.grp_c + p.conv.out_c; // conv weights + biases
-
-            size_t array_size =  p.isBroadcast ? 1 : p.conv.out_c;
-            size_t depthwise_w_size = array_size + array_size; // depthwise weights + biases
-
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {(conv_w_size+depthwise_w_size) * sizeof(float)}, InferenceEngine::C });
-            weights->allocate();
-            CommonTestUtils::fill_data_sine((float *) weights->buffer(), weights->size() / sizeof(float), 5, 10, 0.5);
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            auto& nodes = graph.getNodes();
-            nodes = graph.getNodes();
-            if (p.in.c == 3) {
-                ASSERT_EQ(nodes.size(), 3);
-                ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
-                ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution);
-                ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
-                ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Output);
-            } else {
-                ASSERT_EQ(nodes.size(), 5);
-                ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
-                ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Reorder);
-                ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Convolution);
-                ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
-                ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reorder);
-                ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
-            }
-
-            InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            size_t c1_oh = (p.in.h + 2 * p.conv.pad_h - p.conv.krn_h) / p.conv.str_h + 1;
-            size_t c1_ow = (p.in.w + 2 * p.conv.pad_w - p.conv.krn_w) / p.conv.str_w + 1;
-            InferenceEngine::TBlob<float> dst_ref(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {c1_ow, c1_oh, p.conv.out_c, p.in.n}, InferenceEngine::NCHW));
-            dst_ref.allocate();
-
-            ref_conv_depthwise(*srcPtr, (const float *)weights->buffer(), dst_ref, p);
-
-            compare(*output, dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphConvDepthwiseFusingTests, TestsConvDepthwiseFusing) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsConvDepthwiseFusing, MKLDNNGraphConvDepthwiseFusingTests,
-        ::testing::Values(
-                conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_scale_shift, false},
-                conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_prelu, false},
-                conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_scale_shift, true},
-                conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_prelu, true},
-                conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_scale_shift, false},
-                conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_prelu, false},
-                conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_scale_shift, true},
-                conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_prelu, true},
-                conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_scale_shift, false},
-                conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_prelu, false},
-                conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_scale_shift, true},
-                conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_prelu, true},
-                conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_scale_shift, false},
-                conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_prelu, false},
-                conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_scale_shift, true},
-                conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_prelu, true}
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp
deleted file mode 100644
index 90892d60289..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp
+++ /dev/null
@@ -1,397 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include "ir_gen_helper.hpp"
-#include <ie_core.hpp>
-#include "common_test_utils/common_layers_params.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-using namespace single_layer_tests;
-
-struct concat_params {
-    size_t axis;
-};
-
-struct deconv_concat_params {
-    // Formats: NCHW, NCDHW
-    std::vector<size_t> in;
-
-    CommonTestUtils::conv_common_params deconv;
-    concat_params concat;
-
-    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
-};
-
-void ref_deconv_common(const InferenceEngine::Blob &src,
-                       InferenceEngine::Blob &dst,
-                       const float *weights_data,
-                       size_t weights_size,
-                       const float *bias_data,
-                       size_t bias_size,
-                       const CommonTestUtils::conv_common_params &prm) {
-    auto dims_size = src.getTensorDesc().getDims().size();
-
-    size_t G  = prm.group;
-    size_t KW = prm.kernel[InferenceEngine::X_AXIS];
-    size_t KH = prm.kernel[InferenceEngine::Y_AXIS];
-    size_t KD = prm.kernel.size() > InferenceEngine::Z_AXIS ? prm.kernel[InferenceEngine::Z_AXIS] : 1u;
-
-    size_t PW = prm.pads_begin[InferenceEngine::X_AXIS];
-    size_t PH = prm.pads_begin[InferenceEngine::Y_AXIS];
-    size_t PD = prm.pads_begin.size() > InferenceEngine::Z_AXIS ? prm.pads_begin[InferenceEngine::Z_AXIS] : 0u;
-
-    size_t SW = prm.stride[InferenceEngine::X_AXIS];
-    size_t SH = prm.stride[InferenceEngine::Y_AXIS];
-    size_t SD = prm.stride.size() > InferenceEngine::Z_AXIS ? prm.stride[InferenceEngine::Z_AXIS] : 1u;
-
-    size_t IW = src.getTensorDesc().getDims()[dims_size - 1];
-    size_t IH = src.getTensorDesc().getDims()[dims_size - 2];
-    size_t ID = dims_size == 5 ? src.getTensorDesc().getDims()[dims_size - 3] : 1u;
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t MB = src.getTensorDesc().getDims()[0];
-
-    size_t OC = prm.out_c;
-
-    size_t OW = SW * (IW - 1lu) + KW - 2lu * PW;
-    size_t OH = SH * (IH - 1lu) + KH - 2lu * PH;
-    size_t OD = dims_size == 5 ? (SD * (ID - 1) + KD - 2 * PD) : 1u;
-
-    const float *src_data = src.cbuffer().as<float *>();
-    float *dst_data = dst.buffer().as<float *>();
-
-    size_t CS1 = OH * OW;
-    size_t CS2 = CS1 * OD;
-    size_t CS3 = CS2 * OC;
-
-    size_t CI1 = IH * IW;
-    size_t CI2 = CI1 * ID;
-    size_t CI3 = CI2 * IC;
-    
-    size_t OC_G = OC / G;
-    size_t IC_G = IC / G;
-
-    size_t CK1 = KH * KW;
-    size_t CK2 = CK1 * KD;
-    size_t CK3 = CK2 * OC_G;
-    size_t CK4 = CK3 * IC_G;
-
-    for (size_t g = 0lu; g < G; ++g) {
-        size_t g_OC_G = g * OC_G;
-        size_t g_IC_G = g * IC_G;
-        size_t g_CK4 = g * CK4;
-        for (size_t mb = 0lu; mb < MB; ++mb) {
-            size_t mb_CS3 = mb * CS3;
-            size_t mb_CI3 = mb * CI3;
-            for (size_t oc = 0lu; oc < OC_G; ++oc) {
-                size_t g_OC_G_oc = g_OC_G + oc;
-                size_t mb_CS3_g_OC_G_oc_CS2 = mb_CS3 + g_OC_G_oc * CS2;
-                size_t g_CK4_oc_CK2 = g_CK4 + oc * CK2;
-                for (size_t od = 0lu; od < OD; ++od) {
-                    size_t mb_CS3_g_OC_G_oc_CS2_od_CS1 = mb_CS3_g_OC_G_oc_CS2 + od * CS1;
-                    size_t od_PD = od + PD;
-                    for (size_t oh = 0lu; oh < OH; ++oh) {
-                        size_t mb_CS3_g_OC_G_oc_CS2_od_CS1_oh_OW = mb_CS3_g_OC_G_oc_CS2_od_CS1 + oh * OW;
-                        size_t oh_PH = oh + PH;
-                        for (size_t ow = 0lu; ow < OW; ++ow) {
-                            size_t didx = mb_CS3_g_OC_G_oc_CS2_od_CS1_oh_OW + ow;
-                            size_t ow_PW = ow + PW;
-
-                            dst_data[didx] = float(0);
-                            if (prm.with_bias) dst_data[didx] += bias_data[g_OC_G_oc];
-
-                            for (size_t ic = 0lu; ic < IC_G; ic++) {
-                                size_t mb_CI3_g_IC_G_ic_CI2 = mb_CI3 + (g_IC_G + ic) * CI2;
-                                size_t g_CK4_oc_CK2_ic_CK3 = g_CK4_oc_CK2 + ic * CK3;
-                                for (int kd = 0lu; kd < KD; kd++) {
-                                    if (od_PD < kd) continue;
-                                    size_t id = od_PD - kd;
-                                    if (id % SD != 0) continue;
-                                    id /= SD;
-                                    if (id >= ID) continue;
-                                    size_t mb_CI3_g_IC_G_ic_CI2_id_CI1 = mb_CI3_g_IC_G_ic_CI2 + id * CI1;
-                                    size_t g_CK4_oc_CK2_ic_CK3_kd_CK1 = g_CK4_oc_CK2_ic_CK3 + kd * CK1;
-                                    for (size_t kh = 0lu; kh < KH; kh++) {
-                                        if (oh_PH < kh) continue;
-                                        size_t ih = oh_PH - kh;
-                                        if (ih % SH != 0) continue;
-                                        ih /= SH;
-                                        if (ih >= IH) continue;
-                                        size_t mb_CI3_g_IC_G_ic_CI2_id_CI1_ih_IW = mb_CI3_g_IC_G_ic_CI2_id_CI1 + ih * IW;
-                                        size_t g_CK4_oc_CK2_ic_CK3_kd_CK1_kh_KW = g_CK4_oc_CK2_ic_CK3_kd_CK1 + kh * KW;
-                                        for (size_t kw = 0lu; kw < KW; kw++) {
-                                            if (ow_PW < kw) continue;
-                                            size_t iw = ow_PW - kw;
-                                            if (iw % SW != 0) continue;
-                                            iw /= SW;
-                                            if (iw >= IW) continue;
-
-                                            size_t sidx = mb_CI3_g_IC_G_ic_CI2_id_CI1_ih_IW + iw;
-
-                                            size_t widx = g_CK4_oc_CK2_ic_CK3_kd_CK1_kh_KW + kw;
-
-                                            dst_data[didx] += src_data[sidx] * weights_data[widx];
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNDeconvConcatTests: public TestsCommon,
-                                    public WithParamInterface<deconv_concat_params> {
-    std::string layers_t = R"V0G0N(
-        <layer id="2" name="Deconvolution_1" precision="FP32" type="Deconvolution">
-            <data kernel="_K_" strides="_KS_"
-             pads_begin="_PB_" pads_end="_PE_"
-             dilations="1,1,1" output="_OC_" group="_GC_" PrimitivesPriority="_IMPLS_"/>
-            <input>
-                <port id="0">
-                    __INP_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    __DECONV_OUT_DIMS__
-                </port>
-            </output>
-            <blobs>
-                <weights offset="0" size="262144"/>
-            </blobs>
-	</layer>
-        <layer id="3" name="concat0" precision="FP32" type="Concat">
-            <data axis="__AXIS__"/>
-            <input>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_OC_</dim>
-                    __DECONV_OUT_DIMS__
-                </port>
-                <port id="1">
-                    __INP_DIMS__
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    __CONCAT_OUT_DIMS__
-                </port>
-            </output>
-        </layer>
-)V0G0N";
-
-    std::string edges_t = R"V0G0N(
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
-        <edge from-layer="0" from-port="0" to-layer="3" to-port="1"/>
-        <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-)V0G0N";
-
-    std::string getModel(deconv_concat_params p) {
-        std::string model = layers_t;
-        
-        std::string s_dims;
-        for (auto& dim : p.in) {
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__INP_DIMS__", s_dims);
-
-        s_dims = "";
-        size_t deconv_axis_val = p.in[p.concat.axis];
-        int k_len = p.deconv.kernel.size();
-        for (size_t i = 2lu; i < p.in.size(); i++) {
-            size_t inx = k_len - i + 1;
-            size_t dim = p.deconv.stride[inx] * (p.in[i] - 1) + p.deconv.kernel[inx] - 2 * p.deconv.pads_begin[inx];
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(dim) + "</dim>";
-            if (i == p.concat.axis) {
-                deconv_axis_val = dim;
-            }
-        }
-	REPLACE_WITH_STR(model, "__DECONV_OUT_DIMS__", s_dims);
-
-        s_dims = "";
-        for (size_t i = 0lu; i < p.in.size(); i++) {
-            size_t val = p.in[i];
-            if (i == p.concat.axis) {
-                val += deconv_axis_val;
-            }
-            s_dims += "\n                    <dim>";
-            s_dims += std::to_string(val) + "</dim>";
-        }
-	REPLACE_WITH_STR(model, "__CONCAT_OUT_DIMS__", s_dims);
-
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.deconv.kernel);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.deconv.stride);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.deconv.pads_begin);
-        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.deconv.pads_end);
-        REPLACE_WITH_NUM(model, "_GC_", p.deconv.group);
-        REPLACE_WITH_NUM(model, "_OC_", p.deconv.out_c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in[0]);
-        REPLACE_WITH_NUM(model, "__AXIS__", p.concat.axis);
-
-        std::string impls;
-        for (const auto& preferType : p.preferTypes) {
-            if (!impls.empty())
-                impls += ",";
-            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
-        }
-        REPLACE_WITH_STR(model, "_IMPLS_", impls);
-
-        model = IRTemplateGenerator::getIRTemplate("Deconvolution_Concat", p.in, "FP32", model, edges_t);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            deconv_concat_params p = ::testing::WithParamInterface<deconv_concat_params>::GetParam();
-            std::string model = getModel(p);
-
-            size_t blob_size = p.deconv.out_c * (p.in[1] / p.deconv.group);
-            for (int i = 0 ; i < p.deconv.kernel.size(); i++) {
-                blob_size *= p.deconv.kernel[i];
-            }
-            InferenceEngine::SizeVector dims_weights = { blob_size };
-
-            std::vector<InferenceEngine::Blob::Ptr> blob_to_model;
-            InferenceEngine::Blob::Ptr weights = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, dims_weights, InferenceEngine::C });
-            weights->allocate();
-            fill_data(weights->buffer().as<float*>(), weights->size());
-            blob_to_model.push_back(weights);
-
-            InferenceEngine::Blob::Ptr bias = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, {p.deconv.out_c}, InferenceEngine::C });
-            bias->allocate();
-            fill_data(bias->buffer().as<float*>(), bias->size());
-            blob_to_model.push_back(bias);
-
-            size_t total_size_in_bytes = 0;
-            for (InferenceEngine::Blob::Ptr blb : blob_to_model) total_size_in_bytes += blb->byteSize();
-
-            InferenceEngine::TBlob<uint8_t>::Ptr model_blob =
-                    InferenceEngine::make_shared_blob<uint8_t>({ InferenceEngine::Precision::U8, {total_size_in_bytes}, InferenceEngine::C });
-            model_blob->allocate();
-            uint8_t* model_blob_ptr = model_blob->buffer().as<uint8_t*>();
-            for (InferenceEngine::Blob::Ptr blb : blob_to_model) {
-                memcpy(model_blob_ptr, blb->buffer().as<uint8_t*>(), blb->byteSize());
-                model_blob_ptr += blb->byteSize();
-            }
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, model_blob));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = p.in;
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(
-                    {InferenceEngine::Precision::FP32, dims_src, InferenceEngine::TensorDesc::getLayoutByDims(p.in)});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            // Compare with reference
-
-            auto deconv = CommonTestUtils::getLayerByName(network, "Deconvolution_1");
-            InferenceEngine::TBlob<float> deconv_ref(deconv->outData[0]->getTensorDesc());
-            deconv_ref.allocate();
-
-            ref_deconv_common(*srcPtr, deconv_ref, weights->buffer().as<float*>(), weights->size(),
-                    bias->buffer().as<float*>(), bias->size(), p.deconv);
-
-            float *src1_ptr = deconv_ref.buffer();
-            size_t src1_size = deconv_ref.size();
-            float *src2_ptr = src->buffer();
-            size_t src2_size = src->size();
-            float *dst_ptr = output->buffer();
-            size_t dst_size = output->size();
-
-            int len1 = 1, len2 = 1;
-            for (int dim = p.concat.axis; dim < output->getTensorDesc().getDims().size(); dim++) {
-                len1 *= deconv->outData[0]->getTensorDesc().getDims()[dim];
-                len2 *= src->getTensorDesc().getDims()[dim];
-            }
-
-            size_t index1 = 0, index2 = 0, index = 0;
-            float max_diff = 0.0001f;
-            for (size_t cycle = 0lu; cycle < p.concat.axis; cycle ++) {
-                for (int i1 = 0; i1 < len1; i1++) {
-                    if (fabs(src1_ptr[index1] - dst_ptr[index]) > max_diff)
-                    {
-                        FAIL() << "index: " << index << " src: " << src1_ptr[index1] << ", dst: " << dst_ptr[index];
-                    }
-                    index1++; index++;
-                }
-                for (int i2 = 0; i2 < len2; i2++) {
-                    if (fabs(src2_ptr[index2] - dst_ptr[index]) > max_diff)
-                    {
-                        FAIL() << "index: " << index << " src: " << src2_ptr[index2] << ", dst: " << dst_ptr[index];
-                    }
-                    index2++; index++;
-                }
-            }
-
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNDeconvConcatTests, TestsDwConvFusing) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDwConvFusing, MKLDNNDeconvConcatTests,
-        ::testing::Values(
-                deconv_concat_params{{1, 256, 4, 4}, 
-                                     { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 256, false },
-                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
-                deconv_concat_params{{2, 256, 4, 4},
-                                     { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 256, false },
-                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
-                deconv_concat_params{{1, 256, 4, 4, 4},
-                                     { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 256, false },
-                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
-                deconv_concat_params{{2, 256, 4, 4, 4},
-                                     { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 256, false },
-                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}}
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp
deleted file mode 100644
index ff3706c1d6c..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp
+++ /dev/null
@@ -1,334 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-struct conv_params {
-    size_t krn_w;
-    size_t krn_h;
-    size_t str_w;
-    size_t str_h;
-    size_t pad_w;
-    size_t pad_h;
-    size_t out_c;
-    size_t grp_c;
-};
-
-struct dw_conv_fusing_test_params {
-    struct {
-        size_t n;
-        size_t c;
-        size_t h;
-        size_t w;
-    } in;
-
-    conv_params conv1;
-    conv_params conv2;
-};
-
-template <typename data_t>
-void ref_conv(const InferenceEngine::TBlob<data_t> &src, const data_t *weights, const size_t weightsSize,
-              InferenceEngine::TBlob<data_t> &dst, conv_params prm, float negative_slope) {
-    size_t KW = prm.krn_w;
-    size_t KH = prm.krn_h;
-    size_t GC = prm.grp_c;
-
-    size_t IC = src.getTensorDesc().getDims()[1];
-    size_t IH = src.getTensorDesc().getDims()[2];
-    size_t IW = src.getTensorDesc().getDims()[3];
-
-    size_t OW = (IW + 2 * prm.pad_w - prm.krn_w) / prm.str_w + 1;
-    size_t OH = (IH + 2 * prm.pad_h - prm.krn_h) / prm.str_h + 1;
-    size_t OC = prm.out_c;
-
-    const data_t *src_data = src.readOnly();
-    const data_t *weights_data = weights;
-    const data_t *bias_data = weights_data + KW * KH * OC * IC / GC;
-    data_t *dst_data = dst.data();
-
-    IE_ASSERT(KW * KH * OC * IC / GC + OC == weightsSize);
-
-    for (uint32_t g = 0; g < GC; g++) {
-        for (uint32_t oc = 0; oc < OC / GC; oc++) {
-            for (uint32_t oh = 0; oh < OH; oh++) {
-                for (uint32_t ow = 0; ow < OW; ow++) {
-                    size_t oidx = g * OC / GC * OH * OW
-                                  + oc * OH * OW + oh * OW + ow;
-                    dst_data[oidx] = bias_data[g * OC / GC + oc];
-
-                    for (size_t ic = 0; ic < IC / GC; ic++) {
-                        for (size_t kh = 0; kh < KH; kh++) {
-                            for (size_t kw = 0; kw < KW; kw++) {
-                                int32_t iw = ow * prm.str_w - prm.pad_w + kw;
-                                int32_t ih = oh * prm.str_h - prm.pad_h + kh;
-                                if (iw < 0 || iw >= (int32_t)IW || ih < 0
-                                    || ih >= (int32_t)IH)
-                                    continue;
-                                size_t iidx = g * IC / GC * IH * IW
-                                              + ic * IH * IW + ih * IW + iw;
-                                size_t widx = g * OC / GC * IC / GC * KH * KW
-                                              + oc * IC / GC * KH * KW
-                                              + ic * KH * KW + kh * KW + kw;
-
-                                dst_data[oidx] += src_data[iidx] * weights_data[widx];
-                            }
-                        }
-                    }
-
-                    if (dst_data[oidx] < 0)
-                        dst_data[oidx] *= negative_slope;
-                }
-            }
-        }
-    }
-}
-
-class MKLDNNGraphDWConvFusingTests: public TestsCommon,
-                                    public WithParamInterface<dw_conv_fusing_test_params> {
-    std::string model_t = R"V0G0N(
-<Net Name="Convolution_Only" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" id="1" type="Convolution" precision="FP32">
-            <convolution stride-x="_C1_SW_" stride-y="_C1_SH_"
-                         pad-x="_C1_PW_"    pad-y="_C1_PH_"
-                         kernel-x="_C1_KW_" kernel-y="_C1_KH_"
-                         output="_C1_OC_"   group="_C1_GC_"/>
-
-            <weights offset="0" size="_C1_S1_" />
-            <biases offset="_C1_S1_" size="_C1_S2_" />
-            <input>
-                <port id="1">
-                    <dim>_IN_</dim>
-                    <dim>_IC_</dim>
-                    <dim>_IH_</dim>
-                    <dim>_IW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>_IN_</dim>
-                    <dim>_C1_OC_</dim>
-                    <dim>_C1_OH_</dim>
-                    <dim>_C1_OW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="relu1" id="2" type="ReLU" precision="FP32">
-            <data negative_slope="0"/>
-            <input>
-                <port id="3">
-                    <dim>_IN_</dim>
-                    <dim>_C1_OC_</dim>
-                    <dim>_C1_OH_</dim>
-                    <dim>_C1_OW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>_IN_</dim>
-                    <dim>_C1_OC_</dim>
-                    <dim>_C1_OH_</dim>
-                    <dim>_C1_OW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv2" id="3" type="Convolution" precision="FP32">
-            <convolution stride-x="_C2_SW_" stride-y="_C2_SH_"
-                         pad-x="_C2_PW_"    pad-y="_C2_PH_"
-                         kernel-x="_C2_KW_" kernel-y="_C2_KH_"
-                         output="_C2_OC_"   group="_C2_GC_"/>
-
-            <weights offset="_C2_S0_" size="_C2_S1_" />
-            <biases offset="_C2_S2_" size="_C2_S3_" />
-            <input>
-                <port id="5">
-                    <dim>_IN_</dim>
-                    <dim>_C1_OC_</dim>
-                    <dim>_C1_OH_</dim>
-                    <dim>_C1_OW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="6">
-                    <dim>_IN_</dim>
-                    <dim>_C2_OC_</dim>
-                    <dim>_C2_OH_</dim>
-                    <dim>_C2_OW_</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="relu2" id="4" type="ReLU" precision="FP32">
-            <data negative_slope="0"/>
-            <input>
-                <port id="7">
-                    <dim>_IN_</dim>
-                    <dim>_C2_OC_</dim>
-                    <dim>_C2_OH_</dim>
-                    <dim>_C2_OW_</dim>
-                </port>
-            </input>
-            <output>
-                <port id="8">
-                    <dim>_IN_</dim>
-                    <dim>_C2_OC_</dim>
-                    <dim>_C2_OH_</dim>
-                    <dim>_C2_OW_</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-        <edge from-layer="2" from-port="4" to-layer="3" to-port="5"/>
-        <edge from-layer="3" from-port="6" to-layer="4" to-port="7"/>
-    </edges>
-</Net>
-)V0G0N";
-
-    std::string getModel(dw_conv_fusing_test_params p) {
-        std::string model = model_t;
-        REPLACE_WITH_NUM(model, "_IW_", p.in.w);
-        REPLACE_WITH_NUM(model, "_IH_", p.in.h);
-        REPLACE_WITH_NUM(model, "_IC_", p.in.c);
-        REPLACE_WITH_NUM(model, "_IN_", p.in.n);
-
-        REPLACE_WITH_NUM(model, "_C1_KW_", p.conv1.krn_w);
-        REPLACE_WITH_NUM(model, "_C1_KH_", p.conv1.krn_h);
-        REPLACE_WITH_NUM(model, "_C1_SW_", p.conv1.str_w);
-        REPLACE_WITH_NUM(model, "_C1_SH_", p.conv1.str_h);
-        REPLACE_WITH_NUM(model, "_C1_PW_", p.conv1.pad_w);
-        REPLACE_WITH_NUM(model, "_C1_PH_", p.conv1.pad_h);
-        REPLACE_WITH_NUM(model, "_C1_GC_", p.conv1.grp_c);
-        REPLACE_WITH_NUM(model, "_C1_OC_", p.conv1.out_c);
-        size_t c1_oh = (p.in.h + 2 * p.conv1.pad_h - p.conv1.krn_h) / p.conv1.str_h + 1;
-        size_t c1_ow = (p.in.w + 2 * p.conv1.pad_w - p.conv1.krn_w) / p.conv1.str_w + 1;
-        REPLACE_WITH_NUM(model, "_C1_OH_", c1_oh);
-        REPLACE_WITH_NUM(model, "_C1_OW_", c1_ow);
-
-        size_t conv1_w_data_size = (p.conv1.krn_w * p.conv1.krn_h * p.conv1.out_c * p.in.c / p.conv1.grp_c) * sizeof(float);
-        size_t conv1_b_data_size = p.conv1.out_c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_C1_S1_", conv1_w_data_size);
-        REPLACE_WITH_NUM(model, "_C1_S2_", conv1_b_data_size);
-
-        REPLACE_WITH_NUM(model, "_C2_KW_", p.conv2.krn_w);
-        REPLACE_WITH_NUM(model, "_C2_KH_", p.conv2.krn_h);
-        REPLACE_WITH_NUM(model, "_C2_SW_", p.conv2.str_w);
-        REPLACE_WITH_NUM(model, "_C2_SH_", p.conv2.str_h);
-        REPLACE_WITH_NUM(model, "_C2_PW_", p.conv2.pad_w);
-        REPLACE_WITH_NUM(model, "_C2_PH_", p.conv2.pad_h);
-        REPLACE_WITH_NUM(model, "_C2_GC_", p.conv2.grp_c);
-        REPLACE_WITH_NUM(model, "_C2_OC_", p.conv2.out_c);
-        REPLACE_WITH_NUM(model, "_C2_OH_", (c1_oh + 2 * p.conv2.pad_h - p.conv2.krn_h) / p.conv2.str_h + 1);
-        REPLACE_WITH_NUM(model, "_C2_OW_", (c1_ow + 2 * p.conv2.pad_w - p.conv2.krn_w) / p.conv2.str_w + 1);
-
-        size_t conv2_w_data_size = (p.conv2.krn_w * p.conv2.krn_h * p.conv2.out_c * p.conv1.out_c / p.conv2.grp_c) * sizeof(float);
-        size_t conv2_b_data_size = p.conv2.out_c * sizeof(float);
-        REPLACE_WITH_NUM(model, "_C2_S0_", conv1_w_data_size + conv1_b_data_size);
-        REPLACE_WITH_NUM(model, "_C2_S1_", conv2_w_data_size);
-        REPLACE_WITH_NUM(model, "_C2_S2_", conv1_w_data_size + conv1_b_data_size + conv2_w_data_size);
-        REPLACE_WITH_NUM(model, "_C2_S3_", conv2_b_data_size);
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            TestsCommon::SetUp();
-            dw_conv_fusing_test_params p = ::testing::WithParamInterface<dw_conv_fusing_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            size_t conv1_w_size = p.conv1.krn_w * p.conv1.krn_h * p.conv1.out_c * p.in.c / p.conv1.grp_c + p.conv1.out_c; // conv1 weights + biases
-            size_t conv2_w_size = p.conv2.krn_w * p.conv2.krn_h * p.conv2.out_c * p.conv1.out_c / p.conv2.grp_c + p.conv2.out_c; // conv2 weights + biases
-
-            InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, 
-                {(conv1_w_size+conv2_w_size) * sizeof(float)}, InferenceEngine::C });
-            weights->allocate();
-            fill_data((float *) weights->buffer(), weights->size() / sizeof(float), 1);
-            InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-            InferenceEngine::Core core;
-            InferenceEngine::CNNNetwork network;
-            ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-            MKLDNNGraphTestClass graph;
-            graph.CreateGraph(network);
-
-            InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
-
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src, InferenceEngine::NCHW});
-            src->allocate();
-            fill_data(src->buffer(), src->size());
-
-            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
-
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            InferenceEngine::BlobMap srcs;
-            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
-
-            InferenceEngine::OutputsDataMap out;
-            out = network.getOutputsInfo();
-            InferenceEngine::BlobMap outputBlobs;
-
-            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-            InferenceEngine::TBlob<float>::Ptr output;
-            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-            output->allocate();
-            outputBlobs[item.first] = output;
-
-            graph.Infer(srcs, outputBlobs);
-
-            size_t c1_oh = (p.in.h + 2 * p.conv1.pad_h - p.conv1.krn_h) / p.conv1.str_h + 1;
-            size_t c1_ow = (p.in.w + 2 * p.conv1.pad_w - p.conv1.krn_w) / p.conv1.str_w + 1;
-            InferenceEngine::TBlob<float> conv1_dst_ref(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {p.in.n, p.conv1.out_c, c1_oh, c1_ow}, InferenceEngine::NCHW));
-            conv1_dst_ref.allocate();
-
-            size_t c2_oh = (c1_oh + 2 * p.conv2.pad_h - p.conv2.krn_h) / p.conv2.str_h + 1;
-            size_t c2_ow = (c1_ow + 2 * p.conv2.pad_w - p.conv2.krn_w) / p.conv2.str_w + 1;
-            InferenceEngine::TBlob<float> conv2_dst_ref(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {p.in.n, p.conv2.out_c, c2_oh, c2_ow}, InferenceEngine::NCHW));
-            conv2_dst_ref.allocate();
-
-            ref_conv(*srcPtr, (const float *)weights->buffer(), conv1_w_size, conv1_dst_ref, p.conv1, 0.0f);
-            ref_conv(conv1_dst_ref, (const float *)weights->buffer() + conv1_w_size, conv2_w_size, conv2_dst_ref, p.conv2, 0.0f);
-
-
-            compare(*output, conv2_dst_ref);
-        } catch (const InferenceEngine::Exception &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(MKLDNNGraphDWConvFusingTests, TestsDwConvFusing) {}
-
-INSTANTIATE_TEST_CASE_P(
-        TestsDwConvFusing, MKLDNNGraphDWConvFusingTests,
-        ::testing::Values(
-                dw_conv_fusing_test_params{{1, 32, 160, 320}, {1, 1, 1, 1, 0, 0, 24, 1}, {3, 3, 1, 1, 1, 1, 24, 24}}
-        ));
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp
deleted file mode 100644
index bd8b05458f5..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp
+++ /dev/null
@@ -1,421 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "../test_graph.hpp"
-
-#include "single_layer_common.hpp"
-#include <mkldnn_extension_mngr.h>
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-class MKLDNNGraphOptimizationTests: public TestsCommon {};
-
-TEST_F(MKLDNNGraphOptimizationTests, TestNoFuseConvSumWithOneInput) {
-    std::string model = R"V0G0N(
-<net name="AlexNet" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" type="Convolution" precision="FP32" id="1">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="3" group="1"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-            <weights offset="0" size="36"/>
-            <biases offset="36" size="12"/>
-        </layer>
-        <layer name="res2a" type="Eltwise" precision="FP32" id="2">
-            <elementwise_data operation="sum"/>
-            <input>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="3"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="4"/>
-    </edges>
-</net>
-
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {48}, InferenceEngine::C });
-    weights->allocate();
-    float * data = weights->buffer();
-
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core ie;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = ie.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    ASSERT_NO_THROW(graph.CreateGraph(network));
-
-    bool fused = true;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Convolution) {
-            fused = false;
-        }
-    }
-    ASSERT_FALSE(fused);
-}
-
-TEST_F(MKLDNNGraphOptimizationTests, DISABLED_TestNoCrashForFuseConvSumAndInput) {
-    std::string model = R"V0G0N(
-<net name="AlexNet" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" type="Convolution" precision="FP32" id="1">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="3" group="1"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-            <weights offset="0" size="36"/>
-            <biases offset="36" size="12"/>
-        </layer>
-        <layer name="relu1" type="ReLU" precision="FP32" id="2">
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="res2a" type="Eltwise" precision="FP32" id="3">
-            <elementwise_data operation="sum"/>
-            <input>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="3" to-port="3"/>
-        <edge from-layer="2" from-port="2" to-layer="3" to-port="4"/>
-    </edges>
-</net>
-
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {48}, InferenceEngine::C });
-    weights->allocate();
-    float * data = weights->buffer();
-
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core ie;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(ie.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    ASSERT_NO_THROW(graph.CreateGraph(network));
-
-    bool fused = false;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->isFusedWith(MKLDNNPlugin::Eltwise)) {
-            fused = true;
-        }
-    }
-    ASSERT_TRUE(fused);
-}
-
-namespace GraphOptimizationUtils {
-
-using fake_ext_factory = std::function<InferenceEngine::ILayerImplFactory*(const InferenceEngine::CNNLayer *)>;
-
-class FakeReLUImpl : public InferenceEngine::ILayerExecImpl {
-public:
-    FakeReLUImpl(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig>& conf, InferenceEngine::ResponseDesc *resp) noexcept override {
-        InferenceEngine::LayerConfig config;
-        config.dynBatchSupport = 0;
-        if (cnnLayer->outData.size() != 1 && cnnLayer->insData.size() != 1)
-            return InferenceEngine::GENERAL_ERROR;
-        InferenceEngine::DataConfig cfg;
-        cfg.constant = false;
-        cfg.inPlace = 0;
-        InferenceEngine::SizeVector order;
-        for(size_t i = 0; i < cnnLayer->outData[0]->getTensorDesc().getDims().size(); i++) {
-            order.push_back(i);
-        }
-        cfg.desc = InferenceEngine::TensorDesc(cnnLayer->outData[0]->getTensorDesc().getPrecision(),
-                                               cnnLayer->outData[0]->getTensorDesc().getDims(),
-                                               {cnnLayer->outData[0]->getTensorDesc().getDims(), order});
-        config.outConfs.push_back(cfg);
-        config.inConfs.push_back(cfg);
-        conf.push_back(config);
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode init(InferenceEngine::LayerConfig& config, InferenceEngine::ResponseDesc *resp) noexcept override {
-        if (config.dynBatchSupport)
-            return InferenceEngine::NOT_IMPLEMENTED;
-        for(auto input : config.inConfs) {
-            if (input.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        for(auto output : config.outConfs) {
-            if (output.constant)
-                return InferenceEngine::GENERAL_ERROR;
-        }
-        return InferenceEngine::OK;
-    }
-    InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr>& inputs, std::vector<InferenceEngine::Blob::Ptr>& outputs, InferenceEngine::ResponseDesc *resp) noexcept override {
-        const float *src_data = inputs[0]->buffer();
-        float *dst_data = outputs[0]->buffer();
-        if (src_data != dst_data)
-            return InferenceEngine::GENERAL_ERROR;
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer* cnnLayer;
-};
-
-class FakeReLUFactory : public InferenceEngine::ILayerImplFactory {
-public:
-    FakeReLUFactory(const InferenceEngine::CNNLayer *layer) {
-        cnnLayer = const_cast<InferenceEngine::CNNLayer *>(layer);
-    }
-    // First implementation has more priority than next
-    InferenceEngine::StatusCode getImplementations(std::vector<InferenceEngine::ILayerImpl::Ptr>& impls, InferenceEngine::ResponseDesc *resp) noexcept override {
-        impls.push_back(InferenceEngine::ILayerImpl::Ptr(new FakeReLUImpl(cnnLayer)));
-        return InferenceEngine::OK;
-    }
-
-private:
-    InferenceEngine::CNNLayer * cnnLayer;
-};
-
-class FakeFabric : public InferenceEngine::Extensions::Cpu::MKLDNNExtensions {
-public:
-    FakeFabric() {
-        factories["ReLU"] = [](const InferenceEngine::CNNLayer * cnnLayer) -> InferenceEngine::ILayerImplFactory* { return new FakeReLUFactory(cnnLayer); };
-    }
-
-    virtual ~FakeFabric() {
-        factories.clear();
-    }
-
-    void GetVersion(const InferenceEngine::Version *&versionInfo) const noexcept override {}
-    void Unload() noexcept override {}
-    InferenceEngine::StatusCode getPrimitiveTypes(char**& types, unsigned int& size, InferenceEngine::ResponseDesc* resp) noexcept override {
-        types = new char *[factories.size()];
-        size_t count = 0;
-        for (auto it = factories.begin(); it != factories.end(); it++, count ++) {
-            types[count] = new char[it->first.size() + 1];
-            std::copy(it->first.begin(), it->first.end(), types[count]);
-            types[count][it->first.size() ] = '\0';
-        }
-        return InferenceEngine::OK;
-    };
-    InferenceEngine::StatusCode getFactoryFor(InferenceEngine::ILayerImplFactory *&factory,
-                                              const InferenceEngine::CNNLayer *cnnLayer,
-                                              InferenceEngine::ResponseDesc *resp) noexcept override {
-        if (factories.find(cnnLayer->type) == factories.end()) {
-            std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
-            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            return InferenceEngine::NOT_FOUND;
-        }
-        factory = factories[cnnLayer->type](cnnLayer);
-        return InferenceEngine::OK;
-    }
-
-private:
-    std::map<std::string, fake_ext_factory> factories;
-};
-}
-
-TEST_F(MKLDNNGraphOptimizationTests, TestNoFuseCustomActivation) {
-    std::string model = R"V0G0N(
-<net name="AlexNet" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>227</dim>
-                    <dim>227</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" type="Convolution" precision="FP32" id="1">
-            <convolution_data stride-x="4" stride-y="4" pad-x="0" pad-y="0" kernel-x="11" kernel-y="11" output="96" group="1"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>227</dim>
-                    <dim>227</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>96</dim>
-                    <dim>55</dim>
-                    <dim>55</dim>
-                </port>
-            </output>
-            <weights offset="0" size="139392"/>
-            <biases offset="139392" size="384"/>
-        </layer>
-        <layer name="relu1" type="ReLU" precision="FP32" id="2">
-            <input>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>96</dim>
-                    <dim>55</dim>
-                    <dim>55</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>96</dim>
-                    <dim>55</dim>
-                    <dim>55</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::shared_ptr<InferenceEngine::IExtension> extension;
-    extension.reset(new GraphOptimizationUtils::FakeFabric());
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
-    extMgr->AddExtension(extension);
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {139776}, InferenceEngine::C });
-    weights->allocate();
-    float * data = weights->buffer();
-
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    ASSERT_NO_THROW(graph.CreateGraph(network, extMgr));
-
-    bool fused = true;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Convolution) {
-            fused = false;
-        }
-    }
-    ASSERT_FALSE(fused);
-}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
deleted file mode 100644
index 80f894d0657..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
+++ /dev/null
@@ -1,6671 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "../test_graph.hpp"
-#include "mkldnn_exec_network.h"
-
-#include "tests_common.hpp"
-#include <ie_core.hpp>
-#include <legacy/details/ie_cnn_network_iterator.hpp>
-
-#include <ngraph/ngraph.hpp>
-
-using namespace ::testing;
-using namespace std;
-using namespace mkldnn;
-
-class MKLDNNGraphStructureTests: public TestsCommon {
-protected:
-    MKLDNNPlugin::NumaNodesWeights cache;
-};
-
-TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReorders) {
-    std::string model = R"V0G0N(
-<net name="PVANET" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>544</dim>
-                    <dim>992</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1_1_conv" type="Convolution" precision="FP32" id="2">
-            <convolution_data stride-x="2" stride-y="2" pad-x="3" pad-y="3" kernel-x="7" kernel-y="7" output="16" group="1"/>
-            <input>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>544</dim>
-                    <dim>992</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>16</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </output>
-            <weights offset="0" size="9408"/>
-            <biases offset="9408" size="64"/>
-        </layer>
-        <layer name="conv1_1_neg" type="Power" precision="FP32" id="3">
-            <power_data power="1" scale="-1" shift="0"/>
-            <input>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>16</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </input>
-            <output>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>16</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1_1_concat" type="Concat" precision="FP32" id="4">
-            <concat_data axis="1"/>
-            <input>
-                <port id="6">
-                    <dim>1</dim>
-                    <dim>16</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-                <port id="7">
-                    <dim>1</dim>
-                    <dim>16</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </input>
-            <output>
-                <port id="8">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1_1_scale" type="ScaleShift" precision="FP32" id="5">
-            <input>
-                <port id="9">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </input>
-            <output>
-                <port id="10">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </output>
-            <weights offset="9472" size="128"/>
-            <biases offset="9600" size="128"/>
-        </layer>
-        <layer name="conv1_1_relu" type="ReLU" precision="FP32" id="6">
-            <data negative_slope="0" engine="caffe.ReLUParameter.DEFAULT"/>
-            <input>
-                <port id="11">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </input>
-            <output>
-                <port id="12">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="pool1" type="Pooling" precision="FP32" id="7">
-            <pooling_data kernel-x="3" kernel-y="3" pad-x="0" pad-y="0" stride-x="2" stride-y="2" rounding-type="ceil" pool-method="max"/>
-            <input>
-                <port id="13">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>272</dim>
-                    <dim>496</dim>
-                </port>
-            </input>
-            <output>
-                <port id="14">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>136</dim>
-                    <dim>248</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
-        <edge from-layer="2" from-port="3" to-layer="3" to-port="4"/>
-        <edge from-layer="2" from-port="3" to-layer="4" to-port="6"/>
-        <edge from-layer="3" from-port="5" to-layer="4" to-port="7"/>
-        <edge from-layer="4" from-port="8" to-layer="5" to-port="9"/>
-        <edge from-layer="5" from-port="10" to-layer="6" to-port="11"/>
-        <edge from-layer="6" from-port="12" to-layer="7" to-port="13"/>
-    </edges>
-</net>)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {9728}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder) {
-            reorders_num++;
-        }
-    }
-    ASSERT_EQ(reorders_num, 3);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestRedundantReorderBeforeConvWithC_3) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>320</dim>
-                    <dim>544</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="data_norm_bn" type="BatchNormalization" precision="FP32" id="1">
-            <batch_norm_data epsilon="9.9999997473787516e-06"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>320</dim>
-                    <dim>544</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>320</dim>
-                    <dim>544</dim>
-                </port>
-            </output>
-            <biases offset="0" size="12"/>
-            <weights offset="12" size="12"/>
-        </layer>
-        <layer name="data_norm_scale" type="ScaleShift" precision="FP32" id="2">
-            <input>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>320</dim>
-                    <dim>544</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>320</dim>
-                    <dim>544</dim>
-                </port>
-            </output>
-            <weights offset="24" size="12"/>
-            <biases offset="36" size="12"/>
-        </layer>
-        <layer name="init_conv" type="Convolution" precision="FP32" id="3">
-            <convolution_data stride-x="2" stride-y="2" pad-x="3" pad-y="3" kernel-x="7" kernel-y="7" output="64" group="1"/>
-            <input>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>320</dim>
-                    <dim>544</dim>
-                </port>
-            </input>
-            <output>
-                <port id="6">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>160</dim>
-                    <dim>272</dim>
-                </port>
-            </output>
-            <weights offset="48" size="37632"/>
-            <biases offset="37680" size="256"/>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-        <edge from-layer="2" from-port="4" to-layer="3" to-port="5"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {37936}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder) {
-            reorders_num++;
-            if (node->getChildEdgeAt(0)->getChild()->getName() == "init_conv"){
-                ASSERT_EQ(MKLDNNPlugin::Convolution, node->getChildEdgeAt(0)->getChild()->getType());
-                ASSERT_EQ(InferenceEngine::Layout::NCHW,
-                          node->getChildEdgeAt(0)->getBlob()->getTensorDesc().getLayout());
-            }
-        }
-    }
-    size_t expected = 1;
-    ASSERT_EQ(reorders_num, expected);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersBeforeConcat) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>7</dim>
-                    <dim>7</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1_1_conv" type="Convolution" precision="FP32" id="2">
-            <convolution_data stride-x="2" stride-y="2" pad-x="3" pad-y="3" kernel-x="7" kernel-y="7" output="4" group="1"/>
-            <input>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>7</dim>
-                    <dim>7</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </output>
-            <weights offset="0" size="2352"/>
-            <biases offset="2352" size="16"/>
-        </layer>
-        <layer name="conv1_1_neg" type="Power" precision="FP32" id="3">
-            <power_data power="1" scale="-1" shift="0"/>
-            <input>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </input>
-            <output>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1_1_concat" type="Concat" precision="FP32" id="4">
-            <concat_data axis="1"/>
-            <input>
-                <port id="6">
-                    <dim>1</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-                <port id="7">
-                    <dim>1</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </input>
-            <output>
-                <port id="8">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1_1_scale" type="ScaleShift" precision="FP32" id="5">
-            <input>
-                <port id="9">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </input>
-            <output>
-                <port id="10">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </output>
-            <weights offset="2368" size="32"/>
-            <biases offset="2400" size="32"/>
-        </layer>
-        <layer name="conv1_1_relu" type="ReLU" precision="FP32" id="6">
-            <input>
-                <port id="11">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </input>
-            <output>
-                <port id="12">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
-        <edge from-layer="2" from-port="3" to-layer="3" to-port="4"/>
-        <edge from-layer="2" from-port="3" to-layer="4" to-port="6"/>
-        <edge from-layer="3" from-port="5" to-layer="4" to-port="7"/>
-        <edge from-layer="4" from-port="8" to-layer="5" to-port="9"/>
-        <edge from-layer="5" from-port="10" to-layer="6" to-port="11"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {2432}, InferenceEngine::C });
-    weights->allocate();
-    float * data = weights->buffer();
-
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    size_t idx = 592; // Convolution weights
-    size_t size = 8; // Scale and shift sizes
-    for (size_t i = 0; i < size; i++, idx++) {
-        data[idx] = 1.f;
-    }
-    for (size_t i = 0; i < size; i++, idx++) {
-        data[idx] = 0.f;
-    }
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder && node->getChildEdgeAt(0)->getChild()->getType() != MKLDNNPlugin::Output) {
-            reorders_num++;
-        }
-    }
-    ASSERT_EQ(reorders_num, 2);
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 7, 7}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
-    src->allocate();
-    data = src->buffer().as<float *>();
-    for (size_t i = 0; i < src->size(); i++) {
-        data[i] = (i % 2) ? 1 : -1;
-    }
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    std::vector<float> refDst = {0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.040f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.119f, 0.000f, 0.000f, 1.889f, 0.000f, 0.000f, 0.000f, 1.138f, 0.647f, 0.000f, 0.348f,
-                                 0.000f, 1.711f, 1.311f, 0.000f, 0.000f, 3.045f, 1.203f, 0.000f, 0.927f, 2.041f, 0.000f,
-                                 0.564f, 1.415f, 1.524f, 0.000f, 1.812f, 0.486f, 0.103f, 1.606f, 0.999f, 0.000f, 1.145f,
-                                 2.158f, 0.712f, 0.000f, 0.009f, 0.756f, 0.000f, 0.000f, 0.008f, 0.243f,
-
-                                 0.381f, 0.363f, 1.846f, 0.804f, 1.372f, 1.113f, 2.453f, 1.609f, 0.557f, 0.000f, 3.020f,
-                                 1.422f, 0.481f, 0.221f, 1.137f, 0.401f, 1.475f, 0.301f, 0.862f, 2.052f, 2.680f, 0.284f,
-                                 0.000f, 2.389f, 0.917f, 0.000f, 0.358f, 1.989f, 0.355f, 0.000f, 0.000f, 0.570f, 0.000f,
-                                 0.761f, 0.000f, 0.000f, 0.652f, 0.910f, 0.000f, 0.000f, 0.226f, 0.000f, 0.000f, 0.323f,
-                                 0.000f, 0.000f, 0.000f, 0.108f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.866f, 0.000f,
-                                 0.000f, 0.000f, 0.759f, 0.000f, 0.000f, 0.029f, 1.186f, 0.000f, 0.000f};
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
-
-    compare(*output, *dstOut);
-
-    // Compare for batch2
-    network.setBatchSize(2);
-    graph.CreateGraph(network);
-    desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 3, 7, 7}, InferenceEngine::NCHW);
-
-    InferenceEngine::Blob::Ptr srcBatch = InferenceEngine::make_shared_blob<float>(desc);
-    srcBatch->allocate();
-    data = srcBatch->buffer().as<float *>();
-    float *originData = src->buffer().as<float *>();
-    for(size_t b = 0; b < 2; b++) {
-        for (size_t i = 0; i < src->size(); i++) {
-            data[srcBatch->getTensorDesc().offset(b*src->size() + i)] = originData[src->getTensorDesc().offset(i)];
-        }
-    }
-
-    srcs.clear();
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", srcBatch));
-    out = network.getOutputsInfo();
-
-    outputBlobs.clear();
-    item = *out.begin();
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-    dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    dstOut->allocate();
-    data = dstOut->buffer().as<float *>();
-    for(size_t b = 0; b < 2; b++) {
-        for (size_t i = 0; i < refDst.size(); i++) {
-            data[dstOut->getTensorDesc().offset(b*refDst.size() + i)] = refDst[i];
-        }
-    }
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersBeforeDWConvolution) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>2</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv2_1_1" type="Convolution" precision="FP32" id="1">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="4" group="1"/>
-            <input>
-                <port id="1">
-                    <dim>2</dim>
-                    <dim>3</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>2</dim>
-                    <dim>4</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-            <weights offset="0" size="48"/>
-            <biases offset="48" size="16"/>
-        </layer>
-        <layer name="conv2_1_1_relu" type="ReLU" precision="FP32" id="2">
-            <data negative_slope="0" engine="caffe.ReLUParameter.DEFAULT"/>
-            <input>
-                <port id="3">
-                    <dim>2</dim>
-                    <dim>4</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>2</dim>
-                    <dim>4</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv2_1_2_orig" type="Convolution" precision="FP32" id="3">
-            <convolution_data stride-x="1" stride-y="1" pad-x="1" pad-y="1" kernel-x="3" kernel-y="3" output="4" group="4"/>
-            <input>
-                <port id="5">
-                    <dim>2</dim>
-                    <dim>4</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="6">
-                    <dim>2</dim>
-                    <dim>4</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-            <weights offset="64" size="144"/>
-            <biases offset="208" size="16"/>
-        </layer>
-        <layer name="conv2_1_2_neg" type="Power" precision="FP32" id="4">
-            <power_data power="1" scale="-1" shift="0"/>
-            <input>
-                <port id="7">
-                    <dim>2</dim>
-                    <dim>4</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="8">
-                    <dim>2</dim>
-                    <dim>4</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv2_1_2" type="Concat" precision="FP32" id="5">
-            <concat_data axis="1"/>
-            <input>
-                <port id="9">
-                    <dim>2</dim>
-                    <dim>4</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-                <port id="10">
-                    <dim>2</dim>
-                    <dim>4</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="11">
-                    <dim>2</dim>
-                    <dim>8</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv2_1_2_scale" type="ScaleShift" precision="FP32" id="6">
-            <input>
-                <port id="12">
-                    <dim>2</dim>
-                    <dim>8</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="13">
-                    <dim>2</dim>
-                    <dim>8</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-            <weights offset="224" size="32"/>
-            <biases offset="256" size="32"/>
-        </layer>
-        <layer name="conv2_1_2_relu" type="ReLU" precision="FP32" id="7">
-            <data negative_slope="0" engine="caffe.ReLUParameter.DEFAULT"/>
-            <input>
-                <port id="14">
-                    <dim>2</dim>
-                    <dim>8</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </input>
-            <output>
-                <port id="15">
-                    <dim>2</dim>
-                    <dim>8</dim>
-                    <dim>5</dim>
-                    <dim>5</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-        <edge from-layer="2" from-port="4" to-layer="3" to-port="5"/>
-        <edge from-layer="3" from-port="6" to-layer="4" to-port="7"/>
-        <edge from-layer="3" from-port="6" to-layer="5" to-port="9"/>
-        <edge from-layer="4" from-port="8" to-layer="5" to-port="10"/>
-        <edge from-layer="5" from-port="11" to-layer="6" to-port="12"/>
-        <edge from-layer="6" from-port="13" to-layer="7" to-port="14"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {288}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder) {
-            reorders_num++;
-        }
-    }
-    size_t expected = InferenceEngine::with_cpu_x86_avx2()  ? 2 : 3;
-    ASSERT_EQ(reorders_num, expected);
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {2, 3, 5, 5}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
-    src->allocate();
-    auto *data = src->buffer().as<float *>();
-    size_t sizeB1 = src->size() / 2;
-    fill_data(data, sizeB1);
-    for (size_t i = 0; i < sizeB1; i++) {
-        data[sizeB1 + i] = data[i];
-    }
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    std::vector<float> refDst = {0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f,
-                                 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f,
-                                 0.920f, 0.920f, 0.920f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f,
-                                 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f,
-                                 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.185f, 0.176f, 0.000f, 0.000f, 0.000f, 0.215f, 0.000f, 0.957f, 1.092f, 0.000f,
-                                 0.000f, 0.213f, 0.020f, 1.391f, 2.359f, 0.583f, 0.000f, 0.000f, 0.138f, 0.043f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.720f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.069f, 0.188f, 0.046f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.045f,
-                                 0.041f, 0.000f, 0.000f, 0.056f, 0.000f, 0.000f, 0.086f, 0.025f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.012f, 0.056f, 0.000f, 0.060f, 0.055f, 0.000f, 0.000f, 0.037f, 0.000f, 0.000f,
-                                 0.000f, 0.000f,
-
-                                 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f,
-                                 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f, 0.920f,
-                                 0.920f, 0.920f, 0.920f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f,
-                                 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f,
-                                 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.827f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.185f, 0.176f, 0.000f, 0.000f, 0.000f, 0.215f, 0.000f, 0.957f, 1.092f, 0.000f,
-                                 0.000f, 0.213f, 0.020f, 1.391f, 2.359f, 0.583f, 0.000f, 0.000f, 0.138f, 0.043f, 0.000f,
-                                 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.720f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.069f, 0.188f, 0.046f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.045f,
-                                 0.041f, 0.000f, 0.000f, 0.056f, 0.000f, 0.000f, 0.086f, 0.025f, 0.000f, 0.000f, 0.000f,
-                                 0.000f, 0.012f, 0.056f, 0.000f, 0.060f, 0.055f, 0.000f, 0.000f, 0.037f, 0.000f, 0.000f,
-                                 0.000f, 0.000f};
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
-
-    compare(*output, *dstOut);
-}
-
-// TODO change hardcoded reference to dynamically generated
-TEST_F(MKLDNNGraphStructureTests, DISABLED_TestNoRedundantReordersBeforeDWDeconvolution) {
-    std::string model = R"V0G0N(
-<net name="deconv" version="2" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" type="Convolution" precision="FP32" id="1">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="12" group="1"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>12</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </output>
-            <weights offset="0" size="144"/>
-        </layer>
-        <layer name="deconv1" type="Deconvolution" precision="FP32" id="2">
-            <deconvolution_data stride-x="2" stride-y="2" pad-x="1" pad-y="1" kernel-x="4" kernel-y="4" output="12" group="12"/>
-            <input>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>12</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>12</dim>
-                    <dim>4</dim>
-                    <dim>4</dim>
-                </port>
-            </output>
-            <weights offset="144" size="768"/>
-            <biases offset="912" size="48"/>
-        </layer>
-        <layer name="deconv2" type="Deconvolution" precision="FP32" id="3">
-            <deconvolution_data stride-x="1" stride-y="1" pad-x="1" pad-y="1" kernel-x="2" kernel-y="2" output="24" group="1"/>
-            <input>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>12</dim>
-                    <dim>2</dim>
-                    <dim>2</dim>
-                </port>
-            </input>
-            <output>
-                <port id="6">
-                    <dim>1</dim>
-                    <dim>24</dim>
-                    <dim>1</dim>
-                    <dim>1</dim>
-                </port>
-            </output>
-            <weights offset="960" size="4608"/>
-            <biases offset="5568" size="96"/>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
-        <edge from-layer="1" from-port="2" to-layer="3" to-port="5"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {5664}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder) {
-            reorders_num++;
-            ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
-        }
-    }
-    ASSERT_EQ(reorders_num, 2);
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 2, 2}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
-    src->allocate();
-    fill_data(src->buffer(), src->size());
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    InferenceEngine::DataPtr item = out["deconv1"];
-    InferenceEngine::TBlob<float>::Ptr output1;
-    output1 = InferenceEngine::make_shared_blob<float>(item->getTensorDesc());
-    output1->allocate();
-    outputBlobs["deconv1"] = output1;
-
-    item = out["deconv2"];
-    InferenceEngine::TBlob<float>::Ptr output2;
-    output2 = InferenceEngine::make_shared_blob<float>(item->getTensorDesc());
-    output2->allocate();
-    outputBlobs["deconv2"] = output2;
-
-    graph.Infer(srcs, outputBlobs);
-
-    std::vector<float> refDst1 = {-0.042f, -0.563f, -0.150f, 0.396f, 0.224f, 0.229f, -0.335f, -0.390f, -0.213f, 0.959f, 0.520f, -0.507f,
-                                  -0.200f, -0.202f, 0.441f, 0.499f, 0.000f, 0.000f, 0.000f, 0.000f, 0.363f, 0.141f, -0.497f, -0.332f, -0.311f,
-                                  0.423f, 0.693f, -0.012f, -0.328f, -0.106f, 0.518f, 0.353f, 0.000f, 0.000f, 0.000f, 0.000f, 0.050f, -0.352f,
-                                  -0.045f, 0.000f, -0.303f, 0.605f, 0.754f, -0.143f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.012f, 0.298f, 0.000f,
-                                  -0.066f, -0.303f, -0.318f, -0.054f, 0.322f, 0.002f, 0.050f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                  0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                  0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                  0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.328f, -0.162f, -0.765f, -0.221f, 0.422f, 0.715f, 0.726f, 0.375f,
-                                  0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, -0.744f, -0.038f, -0.109f, 0.000f, 0.583f, 0.892f,
-                                  0.039f, -0.356f, 0.000f, 0.000f, 0.000f, 0.000f, -0.514f, 0.320f, 0.193f, 0.000f, -0.785f, -0.508f, 0.160f, -0.104f,
-                                  0.473f, 0.214f, 0.129f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, -0.299f, 0.784f, 0.953f, -0.163f, -1.160f, -0.547f,
-                                  0.401f, -0.066f, 0.275f, -0.172f, -0.683f, -0.188f, 0.384f, -0.149f, 0.151f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                  0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f,
-                                  0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f, 0.000f};
-    InferenceEngine::TBlob<float>::Ptr dstOut1 = InferenceEngine::make_shared_blob<float>(out["deconv1"]->getTensorDesc(), refDst1.data());
-
-    std::vector<float> refDst2 = {-0.814f, -0.337f, -1.081f, 1.139f, -0.197f, 1.547f, -0.778f, -2.467f, 1.409f, -1.472f, 2.827f, 0.663f,
-                                  -0.645f, 0.105f, -1.873f, -0.272f, 1.071f, 2.706f, -1.705f, 0.602f, -1.956f, 0.734f, 2.325f, -2.147f};
-    InferenceEngine::TBlob<float>::Ptr dstOut2 = InferenceEngine::make_shared_blob<float>(out["deconv2"]->getTensorDesc(), refDst2.data());
-
-    compare(*output1, *dstOut1);
-    compare(*output2, *dstOut2);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestSeveralOutputToNextLayer) {
-    std::string model = R"V0G0N(
-<?xml version="1.0" ?>
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="0" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="1" name="Slice1" precision="FP32" type="Slice">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="3">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="5" name="Concat2" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-		<edge from-layer="1" from-port="1" to-layer="5" to-port="0"/>
-		<edge from-layer="1" from-port="2" to-layer="5" to-port="1"/>
-		<edge from-layer="1" from-port="3" to-layer="5" to-port="2"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder) {
-            reorders_num++;
-        }
-    }
-    ASSERT_EQ(reorders_num, 3);
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 2, 2}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
-    src->allocate();
-    fill_data(src->buffer(), src->size());
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    compare(*output, *src);
-}
-
-
-TEST_F(MKLDNNGraphStructureTests, TestOutputAfterInplacePlusConcat) {
-    std::string model = R"V0G0N(
-<?xml version="1.0" ?>
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="0" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="1" name="Slice1" precision="FP32" type="Slice">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="3">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="2" name="Concat2" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="3" name="Reshape3" precision="FP32" type="Reshape">
-			<data axis="0" dim="1,12" num_axes="-1"/>
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>12</dim>
-				</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-		<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
-		<edge from-layer="1" from-port="2" to-layer="2" to-port="1"/>
-		<edge from-layer="1" from-port="3" to-layer="2" to-port="2"/>
-		<edge from-layer="2" from-port="3" to-layer="3" to-port="0"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
-    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
-    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
-    execNetwork->setNetworkInputs(_networkInputs);
-    execNetwork->setNetworkOutputs(_networkOutputs);
-    InferenceEngine::IInferRequestInternal::Ptr inferRequest = execNetwork->CreateInferRequest();
-
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 2, 2}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
-    src->allocate();
-    fill_data(src->buffer(), src->size());
-
-    InferenceEngine::ResponseDesc resp;
-
-    ASSERT_NO_THROW(inferRequest->SetBlob("data", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-
-    ASSERT_NO_THROW(inferRequest->SetBlob(item.first, output));
-    ASSERT_NO_THROW(inferRequest->Infer());
-
-    compare(*output, *src);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestResnetPart) {
-    std::string modelB = R"V0G0N(
-<net name="ResNet-152" version="2" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>224</dim>
-                    <dim>224</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1" type="Convolution" precision="FP32" id="1">
-            <convolution_data stride-x="2" stride-y="2" pad-x="3" pad-y="3" kernel-x="7" kernel-y="7" output="64" group="1"/>
-            <input>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>3</dim>
-                    <dim>224</dim>
-                    <dim>224</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>112</dim>
-                    <dim>112</dim>
-                </port>
-            </output>
-            <weights offset="0" size="37632"/>
-            <biases offset="37632" size="256"/>
-        </layer>
-        <layer name="conv1_relu" type="ReLU" precision="FP32" id="4">
-            <input>
-                <port id="7">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>112</dim>
-                    <dim>112</dim>
-                </port>
-            </input>
-            <output>
-                <port id="8">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>112</dim>
-                    <dim>112</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="pool1" type="Pooling" precision="FP32" id="5">
-            <pooling_data kernel-x="3" kernel-y="3" pad-x="0" pad-y="0" stride-x="2" stride-y="2" rounding-type="ceil" pool-method="max"/>
-            <input>
-                <port id="9">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>112</dim>
-                    <dim>112</dim>
-                </port>
-            </input>
-            <output>
-                <port id="10">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="res2a_branch2a" type="Convolution" precision="FP32" id="9">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="64" group="1"/>
-            <input>
-                <port id="17">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="18">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-            <weights offset="37888" size="16384"/>
-            <biases offset="54272" size="256"/>
-        </layer>
-        <layer name="res2a_branch2a_relu" type="ReLU" precision="FP32" id="12">
-            <input>
-                <port id="23">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="24">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="res2a_branch2b" type="Convolution" precision="FP32" id="13">
-            <convolution_data stride-x="1" stride-y="1" pad-x="1" pad-y="1" kernel-x="3" kernel-y="3" output="64" group="1"/>
-            <input>
-                <port id="25">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="26">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-            <weights offset="54528" size="147456"/>
-            <biases offset="201984" size="256"/>
-        </layer>
-        <layer name="res2a_branch2b_relu" type="ReLU" precision="FP32" id="16">
-            <input>
-                <port id="31">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="32">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="res2a_branch2c" type="Convolution" precision="FP32" id="17">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="256" group="1"/>
-            <input>
-                <port id="33">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="34">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-            <weights offset="202240" size="65536"/>
-            <biases offset="267776" size="1024"/>
-        </layer>
-        <layer name="res2a_branch1" type="Convolution" precision="FP32" id="6">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="256" group="1"/>
-            <input>
-                <port id="11">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="12">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-            <weights offset="268800" size="65536"/>
-            <biases offset="334336" size="1024"/>
-        </layer>
-        <layer name="res2a" type="Eltwise" precision="FP32" id="20">
-            <elementwise_data operation="sum"/>
-            <input>
-                <port id="39">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-                <port id="40">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="41">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="res2a_relu" type="ReLU" precision="FP32" id="21">
-            <input>
-                <port id="42">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="43">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="res2b_branch2a" type="Convolution" precision="FP32" id="22">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="64" group="1"/>
-            <input>
-                <port id="44">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="45">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-            <weights offset="335360" size="65536"/>
-            <biases offset="400896" size="256"/>
-        </layer>
-        <layer name="res2b_branch2a_relu" type="ReLU" precision="FP32" id="25">
-            <input>
-                <port id="50">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="51">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="res2b_branch2b" type="Convolution" precision="FP32" id="26">
-            <convolution_data stride-x="1" stride-y="1" pad-x="1" pad-y="1" kernel-x="3" kernel-y="3" output="64" group="1"/>
-            <input>
-                <port id="52">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="53">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-            <weights offset="401152" size="147456"/>
-            <biases offset="548608" size="256"/>
-        </layer> )V0G0N";
-        std::string modelE =R"V0G0N(
-        <layer name="res2b_branch2b_relu" type="ReLU" precision="FP32" id="29">
-            <input>
-                <port id="58">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="59">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="res2b_branch2c" type="Convolution" precision="FP32" id="30">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="256" group="1"/>
-            <input>
-                <port id="60">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="61">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-            <weights offset="548864" size="65536"/>
-            <biases offset="614400" size="1024"/>
-        </layer>
-        <layer name="res2b" type="Eltwise" precision="FP32" id="33">
-            <elementwise_data operation="sum"/>
-            <input>
-                <port id="66">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-                <port id="67">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="68">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="res2b_relu" type="ReLU" precision="FP32" id="34">
-            <input>
-                <port id="69">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="70">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="pool5" type="Pooling" precision="FP32" id="668">
-            <pooling_data kernel-x="56" kernel-y="56" pad-x="0" pad-y="0" stride-x="1" stride-y="1" rounding-type="ceil" pool-method="avg"/>
-            <input>
-                <port id="1385">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>56</dim>
-                    <dim>56</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1386">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>1</dim>
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="fc1000" type="FullyConnected" precision="FP32" id="669">
-            <fc_data out-size="1000"/>
-            <input>
-                <port id="1387">
-                    <dim>1</dim>
-                    <dim>256</dim>
-                    <dim>1</dim>
-                    <dim>1</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1388">
-                    <dim>1</dim>
-                    <dim>1000</dim>
-                </port>
-            </output>
-            <weights offset="615424" size="1024000"/>
-            <biases offset="1639424" size="4000"/>
-        </layer>
-        <layer name="prob" type="SoftMax" precision="FP32" id="670">
-            <input>
-                <port id="1389">
-                    <dim>1</dim>
-                    <dim>1000</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1390">
-                    <dim>1</dim>
-                    <dim>1000</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-        <edge from-layer="1" from-port="2" to-layer="4" to-port="7"/>
-        <edge from-layer="4" from-port="8" to-layer="5" to-port="9"/>
-        <edge from-layer="5" from-port="10" to-layer="6" to-port="11"/>
-        <edge from-layer="5" from-port="10" to-layer="9" to-port="17"/>
-        <edge from-layer="12" from-port="24" to-layer="13" to-port="25"/>
-        <edge from-layer="16" from-port="32" to-layer="17" to-port="33"/>
-        <edge from-layer="6" from-port="12" to-layer="20" to-port="39"/>
-        <edge from-layer="9" from-port="18" to-layer="12" to-port="23"/>
-        <edge from-layer="13" from-port="26" to-layer="16" to-port="31"/>
-        <edge from-layer="17" from-port="34" to-layer="20" to-port="40"/>
-        <edge from-layer="20" from-port="41" to-layer="21" to-port="42"/>
-        <edge from-layer="21" from-port="43" to-layer="22" to-port="44"/>
-        <edge from-layer="25" from-port="51" to-layer="26" to-port="52"/>
-        <edge from-layer="29" from-port="59" to-layer="30" to-port="60"/>
-        <edge from-layer="21" from-port="43" to-layer="33" to-port="66"/>
-        <edge from-layer="22" from-port="45" to-layer="25" to-port="50"/>
-        <edge from-layer="26" from-port="53" to-layer="29" to-port="58"/>
-        <edge from-layer="30" from-port="61" to-layer="33" to-port="67"/>
-        <edge from-layer="33" from-port="68" to-layer="34" to-port="69"/>
-        <edge from-layer="34" from-port="70" to-layer="668" to-port="1385"/>
-        <edge from-layer="668" from-port="1386" to-layer="669" to-port="1387"/>
-        <edge from-layer="669" from-port="1388" to-layer="670" to-port="1389"/>
-    </edges>
-    <pre-process reference-layer-name="input" mean-precision="FP16">
-        <channel id="0">
-            <mean value="104.00698793"/>
-        </channel>
-        <channel id="1">
-            <mean value="116.66876762"/>
-        </channel>
-        <channel id="2">
-            <mean value="122.67891434"/>
-        </channel>
-    </pre-process>
-</net>
-)V0G0N";
-
-    std::string model = modelB + modelE;
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {1643424}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
-    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
-    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
-    execNetwork->setNetworkInputs(_networkInputs);
-    execNetwork->setNetworkOutputs(_networkOutputs);
-    InferenceEngine::IInferRequestInternal::Ptr inferRequest = execNetwork->CreateInferRequest();
-
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 224, 224}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
-    src->allocate();
-    fill_data(src->buffer(), src->size());
-
-    InferenceEngine::ResponseDesc resp;
-
-    ASSERT_NO_THROW(inferRequest->SetBlob("input", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-
-    ASSERT_NO_THROW(inferRequest->SetBlob(item.first.c_str(), output));
-
-    ASSERT_NO_THROW(inferRequest->Infer());
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestConcatAfterConcat) {
-    std::string model = R"V0G0N(
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="0" name="data1" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="1" name="data2" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>4</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="2" name="data3" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="3" name="Concat1" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>5</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="4" name="Concat2" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>4</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>5</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>9</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-        <layer name="pool" type="Pooling" precision="FP32" id="5">
-            <pooling_data kernel-x="20" kernel-y="20" pad-x="0" pad-y="0" stride-x="1" stride-y="1" rounding-type="ceil" pool-method="avg"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>9</dim>
-                    <dim>20</dim>
-                    <dim>20</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>9</dim>
-                    <dim>1</dim>
-                    <dim>1</dim>
-                </port>
-            </output>
-        </layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
-		<edge from-layer="2" from-port="0" to-layer="3" to-port="1"/>
-		<edge from-layer="1" from-port="0" to-layer="4" to-port="0"/>
-		<edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
-		<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
-    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
-    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
-    execNetwork->setNetworkInputs(_networkInputs);
-    execNetwork->setNetworkOutputs(_networkOutputs);
-    InferenceEngine::IInferRequestInternal::Ptr inferRequest = execNetwork->CreateInferRequest();
-
-    InferenceEngine::TensorDesc desc1(InferenceEngine::Precision::FP32, {1, 3, 20, 20}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc1);
-    src1->allocate();
-    fill_data(src1->buffer(), src1->size());
-
-    InferenceEngine::TensorDesc desc2(InferenceEngine::Precision::FP32, {1, 4, 20, 20}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>(desc2);
-    src2->allocate();
-    fill_data(src2->buffer(), src2->size());
-
-    InferenceEngine::TensorDesc desc3(InferenceEngine::Precision::FP32, {1, 2, 20, 20}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>(desc3);
-    src3->allocate();
-    fill_data(src3->buffer(), src3->size());
-
-    InferenceEngine::ResponseDesc resp;
-
-    ASSERT_NO_THROW(inferRequest->SetBlob("data1", src1));
-    ASSERT_NO_THROW(inferRequest->SetBlob("data2", src2));
-    ASSERT_NO_THROW(inferRequest->SetBlob("data3", src3));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-
-    ASSERT_NO_THROW(inferRequest->SetBlob(item.first, output));
-
-    ASSERT_NO_THROW(inferRequest->Infer());
-
-//    compare(*output, *src);
-}
-
-TEST_F(MKLDNNGraphStructureTests, Test2ConcatFromConcat) {
-    std::string model = R"V0G0N(
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="0" name="data1" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="1" name="data2" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>4</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="2" name="data3" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="3" name="data4" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="4" name="Concat0" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>5</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="5" name="Concat1" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>5</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>4</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>9</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="6" name="Concat2" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>5</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>6</dim>
-					<dim>2</dim>
-					<dim>2</dim>
-				</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="4" to-port="0"/>
-		<edge from-layer="2" from-port="0" to-layer="4" to-port="1"/>
-		<edge from-layer="1" from-port="0" to-layer="5" to-port="1"/>
-		<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
-		<edge from-layer="3" from-port="0" to-layer="6" to-port="1"/>
-		<edge from-layer="4" from-port="2" to-layer="6" to-port="0"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
-    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
-    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
-    execNetwork->setNetworkInputs(_networkInputs);
-    execNetwork->setNetworkOutputs(_networkOutputs);
-    InferenceEngine::IInferRequestInternal::Ptr inferRequest = execNetwork->CreateInferRequest();
-
-    InferenceEngine::TensorDesc desc1(InferenceEngine::Precision::FP32, {1, 3, 2, 2}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc1);
-    src1->allocate();
-    fill_data(src1->buffer(), src1->size());
-
-    InferenceEngine::TensorDesc desc2(InferenceEngine::Precision::FP32, {1, 4, 2, 2}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>(desc2);
-    src2->allocate();
-    fill_data(src2->buffer(), src2->size());
-
-    InferenceEngine::TensorDesc desc3(InferenceEngine::Precision::FP32, {1, 2, 2, 2}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>(desc3);
-    src3->allocate();
-    fill_data(src3->buffer(), src3->size());
-
-    InferenceEngine::TensorDesc desc4(InferenceEngine::Precision::FP32, {1, 1, 2, 2}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src4 = InferenceEngine::make_shared_blob<float>(desc4);
-    src4->allocate();
-    fill_data(src4->buffer(), src4->size());
-
-    InferenceEngine::ResponseDesc resp;
-
-    ASSERT_NO_THROW(inferRequest->SetBlob("data1", src1));
-    ASSERT_NO_THROW(inferRequest->SetBlob("data2", src2));
-    ASSERT_NO_THROW(inferRequest->SetBlob("data3", src3));
-    ASSERT_NO_THROW(inferRequest->SetBlob("data4", src4));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    std::vector<InferenceEngine::TBlob<float>::Ptr> outputs;
-    std::vector<InferenceEngine::TBlob<float>::Ptr> refOutputs;
-    for (const auto& it : out) {
-        InferenceEngine::TBlob<float>::Ptr output;
-        output = InferenceEngine::make_shared_blob<float>(it.second->getTensorDesc());
-        output->allocate();
-        outputs.push_back(output);
-
-        InferenceEngine::TBlob<float>::Ptr refOutput;
-        refOutput = InferenceEngine::make_shared_blob<float>(it.second->getTensorDesc());
-        refOutput->allocate();
-
-        float * refData = refOutput->buffer().as<float *>();
-        size_t ref_idx = 0;
-        if (it.first == "Concat1") {
-            float *srcData = src1->buffer().as<float *>();
-            for (size_t i = 0; i < src1->size(); i++, ref_idx++) {
-                refData[ref_idx] = srcData[i];
-            }
-            srcData = src3->buffer().as<float *>();
-            for (size_t i = 0; i < src3->size(); i++, ref_idx++) {
-                refData[ref_idx] = srcData[i];
-            }
-            srcData = src2->buffer().as<float *>();
-            for (size_t i = 0; i < src2->size(); i++, ref_idx++) {
-                refData[ref_idx] = srcData[i];
-            }
-
-
-        } else if (it.first == "Concat2") {
-            float *srcData = src1->buffer().as<float *>();
-            for (size_t i = 0; i < src1->size(); i++, ref_idx++) {
-                refData[ref_idx] = srcData[i];
-            }
-            srcData = src3->buffer().as<float *>();
-            for (size_t i = 0; i < src3->size(); i++, ref_idx++) {
-                refData[ref_idx] = srcData[i];
-            }
-            srcData = src4->buffer().as<float *>();
-            for (size_t i = 0; i < src4->size(); i++, ref_idx++) {
-                refData[ref_idx] = srcData[i];
-            }
-
-        }
-        refOutputs.push_back(refOutput);
-
-        ASSERT_NO_THROW(inferRequest->SetBlob(it.first, output));
-    }
-
-    ASSERT_NO_THROW(inferRequest->Infer());
-
-    for (size_t i = 0; i < outputs.size(); i++) {
-        compare(*outputs[i], *refOutputs[i]);
-    }
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestResultsAfterGroupedConvWithStrides) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>24</dim>
-                    <dim>80</dim>
-                    <dim>80</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1_1_conv" type="Convolution" precision="FP32" id="2">
-            <data dilation-x="1" dilation-y="1" group="6" kernel-x="3" kernel-y="3" output="24" pad-x="1" pad-y="1" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-            <input>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>24</dim>
-                    <dim>80</dim>
-                    <dim>80</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>24</dim>
-                    <dim>80</dim>
-                    <dim>80</dim>
-                </port>
-            </output>
-            <weights offset="0" size="3456"/>
-            <biases offset="3456" size="96"/>
-        </layer>
-        <layer name="conv1_1_neg" type="Power" precision="FP32" id="3">
-            <power_data power="1" scale="-1" shift="0"/>
-            <input>
-                <port id="4">
-                    <dim>1</dim>
-                    <dim>24</dim>
-                    <dim>80</dim>
-                    <dim>80</dim>
-                </port>
-            </input>
-            <output>
-                <port id="5">
-                    <dim>1</dim>
-                    <dim>24</dim>
-                    <dim>80</dim>
-                    <dim>80</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv1_1_concat" type="Concat" precision="FP32" id="4">
-            <concat_data axis="1"/>
-            <input>
-                <port id="6">
-                    <dim>1</dim>
-                    <dim>24</dim>
-                    <dim>80</dim>
-                    <dim>80</dim>
-                </port>
-                <port id="7">
-                    <dim>1</dim>
-                    <dim>24</dim>
-                    <dim>80</dim>
-                    <dim>80</dim>
-                </port>
-            </input>
-            <output>
-                <port id="8">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>80</dim>
-                    <dim>80</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="2"/>
-        <edge from-layer="2" from-port="3" to-layer="3" to-port="4"/>
-        <edge from-layer="2" from-port="3" to-layer="4" to-port="6"/>
-        <edge from-layer="3" from-port="5" to-layer="4" to-port="7"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {3552}, InferenceEngine::C });
-    weights->allocate();
-    float * data = weights->buffer();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 24, 80, 80}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
-    src->allocate();
-    fill_data((float *) src->buffer(), src->size());
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr refOutput;
-    refOutput = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    refOutput->allocate();
-    outputBlobs[item.first] = refOutput;
-
-    graph.Infer(srcs, outputBlobs);
-
-    // Compare for batch2
-    network.setBatchSize(2);
-    graph.CreateGraph(network);
-    desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 24, 80, 80}, InferenceEngine::NCHW);
-
-    InferenceEngine::Blob::Ptr srcBatch = InferenceEngine::make_shared_blob<float>(desc);
-    srcBatch->allocate();
-    data = srcBatch->buffer().as<float *>();
-    float *originData = src->buffer().as<float *>();
-    for(size_t b = 0; b < 2; b++) {
-        for (size_t i = 0; i < src->size(); i++) {
-            data[srcBatch->getTensorDesc().offset(b*src->size() + i)] = originData[src->getTensorDesc().offset(i)];
-        }
-    }
-
-    srcs.clear();
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", srcBatch));
-    out = network.getOutputsInfo();
-
-    outputBlobs.clear();
-    item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    dstOut->allocate();
-    data = dstOut->buffer().as<float *>();
-    originData = refOutput->buffer().as<float *>();
-    for(size_t b = 0; b < 2; b++) {
-        for (size_t i = 0; i < refOutput->size(); i++) {
-            data[dstOut->getTensorDesc().offset(b*refOutput->size() + i)] = originData[refOutput->getTensorDesc().offset(i)];
-        }
-    }
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestLoadTopologyWithConstLayer) {
-    std::string model = R"V0G0N(
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="0" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="1" name="data1" precision="FP32" type="Const">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>4</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-			<blobs>
-				<custom offset="0" size="6400"/>
-			</blobs>
-		</layer>
-		<layer id="3" name="Concat1" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>4</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>7</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
-		<edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {6400}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
-    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
-    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
-    execNetwork->setNetworkInputs(_networkInputs);
-    execNetwork->setNetworkOutputs(_networkOutputs);
-    InferenceEngine::IInferRequestInternal::Ptr inferRequest = execNetwork->CreateInferRequest();
-
-    InferenceEngine::TensorDesc desc1(InferenceEngine::Precision::FP32, {1, 3, 20, 20}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc1);
-    src1->allocate();
-    fill_data(src1->buffer(), src1->size());
-
-    InferenceEngine::ResponseDesc resp;
-
-    ASSERT_NO_THROW(inferRequest->SetBlob("data", src1));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-
-    ASSERT_NO_THROW(inferRequest->SetBlob(item.first.c_str(), output));
-
-    ASSERT_NO_THROW(inferRequest->Infer());
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestLoadTopologyWithEltwiseBeforeConcat) {
-    std::string model = R"V0G0N(
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="0" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="1" name="data1" precision="FP32" type="Const">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-			<blobs>
-				<custom offset="0" size="4800"/>
-			</blobs>
-		</layer>
-		<layer id="2" name="data2" precision="FP32" type="Const">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-			<blobs>
-				<custom offset="4800" size="1600"/>
-			</blobs>
-		</layer>
-        <layer name="Eltwise1" type="Eltwise" id="3" precision="FP32">
-			<data operation="sum" />
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="4" name="Concat1" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>4</dim>
-					<dim>20</dim>
-					<dim>20</dim>
-				</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="3" to-port="0"/>
-		<edge from-layer="1" from-port="0" to-layer="3" to-port="1"/>
-		<edge from-layer="3" from-port="2" to-layer="4" to-port="1"/>
-		<edge from-layer="2" from-port="0" to-layer="4" to-port="0"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {6400}, InferenceEngine::C });
-    weights->allocate();
-    float * data = weights->buffer();
-    for (size_t i = 0; i < 1200; i++) {
-        data[i] = 3;
-    }
-    for (size_t i = 1200; i < 1600; i++) {
-        data[i] = 4;
-    }
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
-    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
-    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
-    execNetwork->setNetworkInputs(_networkInputs);
-    execNetwork->setNetworkOutputs(_networkOutputs);
-    InferenceEngine::IInferRequestInternal::Ptr inferRequest = execNetwork->CreateInferRequest();
-
-    InferenceEngine::TensorDesc desc1(InferenceEngine::Precision::FP32, {1, 3, 20, 20}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc1);
-    src1->allocate();
-    data = src1->buffer();
-    for (size_t i = 0; i < 1200; i++) {
-        data[i] = 1;
-    }
-
-    InferenceEngine::ResponseDesc resp;
-
-    ASSERT_NO_THROW(inferRequest->SetBlob("data", src1));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-
-    ASSERT_NO_THROW(inferRequest->SetBlob(item.first.c_str(), output));
-
-    ASSERT_NO_THROW(inferRequest->Infer());
-
-    auto *res_ptr = output->buffer().as<float*>();
-    size_t res_size = output->size();
-
-    for (size_t i = 0; i < res_size; i++) {
-        ASSERT_NEAR(res_ptr[i], 4, 0.01f);
-    }
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder) {
-            reorders_num++;
-            ASSERT_EQ(MKLDNNPlugin::Input, node->getParentEdgeAt(0)->getParent()->getType());
-            ASSERT_EQ(MKLDNNPlugin::Eltwise, node->getChildEdgeAt(0)->getChild()->getType());
-        }
-    }
-    ASSERT_EQ(reorders_num, 0);
-}
-TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersRmnet_SSSSD) {
-    std::string model = R"V0G0N(
-<?xml version="1.0" ?>
-<net batch="1" name="model" version="2">
-	<layers>
-		<layer id="26" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>320</dim>
-					<dim>544</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="9" name="Mul_115/Fused_Mul_157/FusedScaleShift_204" precision="FP32" type="ScaleShift">
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>320</dim>
-					<dim>544</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>320</dim>
-					<dim>544</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="7180" size="12"/>
-				<biases offset="2528" size="12"/>
-			</blobs>
-		</layer>
-		<layer id="51" name="init_block1/dim_inc/conv" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="3" kernel-y="3" output="32" pad-x="1" pad-y="1" stride="1,1,2,2" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>320</dim>
-					<dim>544</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="3596" size="3456"/>
-				<biases offset="8536" size="128"/>
-			</blobs>
-		</layer>
-		<layer id="43" name="init_block1/dim_inc/fn" precision="FP32" type="ReLU">
-			<data engine="caffe.ReLUParameter.DEFAULT" negative_slope="0.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="11" name="bottleneck1_1/dim_red/conv" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="8" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="32" size="1024"/>
-				<biases offset="1472" size="32"/>
-			</blobs>
-		</layer>
-		<layer id="22" name="bottleneck1_1/dim_red/fn" precision="FP32" type="ELU">
-			<data alpha="1.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="34" name="bottleneck1_1/inner/dw1/conv" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="8" kernel-x="3" kernel-y="3" output="8" pad-x="1" pad-y="1" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="8248" size="288"/>
-				<biases offset="3564" size="32"/>
-			</blobs>
-		</layer>
-		<layer id="39" name="bottleneck1_1/inner/dw1/fn" precision="FP32" type="ELU">
-			<data alpha="1.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="18" name="bottleneck1_1/dim_inc/conv" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="32" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="2540" size="1024"/>
-				<biases offset="7052" size="128"/>
-			</blobs>
-		</layer>
-		<layer id="32" name="bottleneck1_1/add" precision="FP32" type="Eltwise">
-			<data coeff="" operation="sum"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="7" name="bottleneck1_1/fn" precision="FP32" type="ELU">
-			<data alpha="1.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="29" name="bottleneck1_2/dim_red/conv" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="8" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="7192" size="1024"/>
-				<biases offset="0" size="32"/>
-			</blobs>
-		</layer>
-		<layer id="45" name="bottleneck1_2/dim_red/fn" precision="FP32" type="ELU">
-			<data alpha="1.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="41" name="bottleneck1_2/inner/dw1/conv" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="8" kernel-x="3" kernel-y="3" output="8" pad-x="1" pad-y="1" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="1184" size="288"/>
-				<biases offset="8216" size="32"/>
-			</blobs>
-		</layer>
-		<layer id="25" name="bottleneck1_2/inner/dw1/fn" precision="FP32" type="ELU">
-			<data alpha="1.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="6" name="bottleneck1_2/dim_inc/conv" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="32" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="1504" size="1024"/>
-				<biases offset="1056" size="128"/>
-			</blobs>
-		</layer>
-		<layer id="44" name="bottleneck1_2/add" precision="FP32" type="Eltwise">
-			<data coeff="" operation="sum"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="49" name="bottleneck1_2/fn" precision="FP32" type="ELU">
-			<data alpha="1.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>160</dim>
-					<dim>272</dim>
-				</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="26" from-port="0" to-layer="9" to-port="0"/>
-		<edge from-layer="9" from-port="3" to-layer="51" to-port="0"/>
-		<edge from-layer="51" from-port="3" to-layer="43" to-port="0"/>
-		<edge from-layer="43" from-port="1" to-layer="11" to-port="0"/>
-		<edge from-layer="11" from-port="3" to-layer="22" to-port="0"/>
-		<edge from-layer="22" from-port="1" to-layer="34" to-port="0"/>
-		<edge from-layer="34" from-port="3" to-layer="39" to-port="0"/>
-		<edge from-layer="39" from-port="1" to-layer="18" to-port="0"/>
-		<edge from-layer="43" from-port="1" to-layer="32" to-port="0"/>
-		<edge from-layer="18" from-port="3" to-layer="32" to-port="1"/>
-		<edge from-layer="32" from-port="2" to-layer="7" to-port="0"/>
-		<edge from-layer="7" from-port="1" to-layer="29" to-port="0"/>
-		<edge from-layer="29" from-port="3" to-layer="45" to-port="0"/>
-		<edge from-layer="45" from-port="1" to-layer="41" to-port="0"/>
-		<edge from-layer="41" from-port="3" to-layer="25" to-port="0"/>
-		<edge from-layer="25" from-port="1" to-layer="6" to-port="0"/>
-		<edge from-layer="7" from-port="1" to-layer="44" to-port="0"/>
-		<edge from-layer="6" from-port="3" to-layer="44" to-port="1"/>
-		<edge from-layer="44" from-port="2" to-layer="49" to-port="0"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {8664}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder) {
-            reorders_num++;
-            ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
-        }
-    }
-
-    ASSERT_EQ(reorders_num, 1);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestFailedPartDPN92) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>14</dim>
-                    <dim>14</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="data2" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>64</dim>
-                    <dim>28</dim>
-                    <dim>28</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="132" name="dpn8_match_conv" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="96" pad-x="0" pad-y="0" stride="1,1,2,2" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>28</dim>
-					<dim>28</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>96</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="24576"/>
-			</blobs>
-		</layer>
-		<layer id="133" name="dpn8_match_conv_Slice" precision="FP32" type="Slice">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>96</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="145" name="dpn8_conv3" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="72" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>72</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="24576" size="9216"/>
-			</blobs>
-		</layer>
-		<layer id="146" name="dpn8_conv3_Slice" precision="FP32" type="Slice">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>72</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="147" name="dpn8_elewise" precision="FP32" type="Eltwise">
-			<data coeff="" operation="sum"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="148" name="dpn8_concat" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>40</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="149" name="dpn9_concat_input" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>40</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>104</dim>
-					<dim>14</dim>
-					<dim>14</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="145" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="132" to-port="0"/>
-        <edge from-layer="145" from-port="2" to-layer="146" to-port="0"/>
-        <edge from-layer="132" from-port="2" to-layer="133" to-port="0"/>
-        <edge from-layer="133" from-port="1" to-layer="147" to-port="0"/>
-		<edge from-layer="146" from-port="1" to-layer="147" to-port="1"/>
-		<edge from-layer="133" from-port="2" to-layer="148" to-port="0"/>
-		<edge from-layer="146" from-port="2" to-layer="148" to-port="1"/>
-        <edge from-layer="148" from-port="2" to-layer="149" to-port="1"/>
-		<edge from-layer="147" from-port="2" to-layer="149" to-port="0"/>
-    </edges>
-</net>)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {33792}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 32, 14, 14}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc);
-    src1->allocate();
-    fill_data((float *) src1->buffer(), src1->size());
-
-
-    desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {1, 64, 28, 28}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>(desc);
-    src2->allocate();
-    fill_data((float *) src2->buffer(), src2->size());
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src1));
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data2", src2));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    std::vector<float> refDst(output->size());
-    auto *data = output->buffer().as<float *>();
-    for (size_t i = 0; i < output->size(); i++) {
-        refDst[i] = data[output->getTensorDesc().offset(i)];
-    }
-
-    // Compare for batch2
-    network.setBatchSize(2);
-    graph.CreateGraph(network);
-    desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 32, 14, 14}, InferenceEngine::NCHW);
-
-    InferenceEngine::Blob::Ptr src1Batch = InferenceEngine::make_shared_blob<float>(desc);
-    src1Batch->allocate();
-    data = src1Batch->buffer().as<float *>();
-    auto *originData = src1->buffer().as<float *>();
-    for(size_t b = 0; b < 2; b++) {
-        for (size_t i = 0; i < src1->size(); i++) {
-            data[src1Batch->getTensorDesc().offset(b*src1->size() + i)] = originData[src1->getTensorDesc().offset(i)];
-        }
-    }
-
-    desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 64, 28, 28}, InferenceEngine::NCHW);
-
-    InferenceEngine::Blob::Ptr src2Batch = InferenceEngine::make_shared_blob<float>(desc);
-    src2Batch->allocate();
-    data = src2Batch->buffer().as<float *>();
-    originData = src2->buffer().as<float *>();
-    for(size_t b = 0; b < 2; b++) {
-        for (size_t i = 0; i < src2->size(); i++) {
-            data[src2Batch->getTensorDesc().offset(b*src2->size() + i)] = originData[src2->getTensorDesc().offset(i)];
-        }
-    }
-
-    srcs.clear();
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src1Batch));
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data2", src2Batch));
-    out = network.getOutputsInfo();
-
-    outputBlobs.clear();
-    item = *out.begin();
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    dstOut->allocate();
-    data = dstOut->buffer().as<float *>();
-    for(size_t b = 0; b < 2; b++) {
-        for (size_t i = 0; i < refDst.size(); i++) {
-            data[dstOut->getTensorDesc().offset(b*refDst.size() + i)] = refDst[i];
-        }
-    }
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForXceptionTopology) {
-    std::string model = R"V0G0N(
-<net batch="1" name="xception" version="2">
-	<layers>
-		<layer id="1" name="input_1" precision="FP32" type="Input">
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>299</dim>
-					<dim>299</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="2" name="block1_conv1" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="3" kernel-y="3" output="32" pad-x="0" pad-y="0" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="2">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>299</dim>
-					<dim>299</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>149</dim>
-					<dim>149</dim>
-				</port>
-			</output>
-			<weights offset="0" size="3456"/>
-			<biases offset="3456" size="128"/>
-		</layer>
-		<layer id="4" name="block1_conv1_act" precision="FP32" type="ReLU">
-			<input>
-				<port id="6">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>149</dim>
-					<dim>149</dim>
-				</port>
-			</input>
-			<output>
-				<port id="7">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>149</dim>
-					<dim>149</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="5" name="block1_conv2" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="3" kernel-y="3" output="64" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="8">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>149</dim>
-					<dim>149</dim>
-				</port>
-			</input>
-			<output>
-				<port id="9">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</output>
-			<weights offset="3584" size="73728"/>
-			<biases offset="77312" size="256"/>
-		</layer>
-		<layer id="7" name="block1_conv2_act" precision="FP32" type="ReLU">
-			<input>
-				<port id="12">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</input>
-			<output>
-				<port id="13">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="136" name="block2_sepconv1_depth" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="64" kernel-x="3" kernel-y="3" output="64" pad-x="1" pad-y="1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="282">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</input>
-			<output>
-				<port id="283">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</output>
-			<weights offset="77568" size="2304"/>
-		</layer>
-		<layer id="137" name="block2_sepconv1_point" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="128" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="284">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</input>
-			<output>
-				<port id="285">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</output>
-			<weights offset="79872" size="32768"/>
-			<biases offset="112640" size="512"/>
-		</layer>
-		<layer id="10" name="block2_sepconv2_act" precision="FP32" type="ReLU">
-			<input>
-				<port id="19">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</input>
-			<output>
-				<port id="20">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="138" name="block2_sepconv2_depth" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="128" kernel-x="3" kernel-y="3" output="128" pad-x="1" pad-y="1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="286">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</input>
-			<output>
-				<port id="287">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</output>
-			<weights offset="113152" size="4608"/>
-		</layer>
-		<layer id="139" name="block2_sepconv2_point" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="128" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="288">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</input>
-			<output>
-				<port id="289">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</output>
-			<weights offset="117760" size="65536"/>
-			<biases offset="183296" size="512"/>
-		</layer>
-		<layer id="13" name="conv2d_1" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="128" pad-x="0" pad-y="0" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="15">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</input>
-			<output>
-				<port id="26">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</output>
-			<weights offset="183808" size="32768"/>
-			<biases offset="216576" size="512"/>
-		</layer>
-		<layer id="14" name="block2_pool" precision="FP32" type="Pooling">
-			<data kernel-x="3" kernel-y="3" pad-x="1" pad-y="1" pool-method="max" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="25">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>147</dim>
-					<dim>147</dim>
-				</port>
-			</input>
-			<output>
-				<port id="28">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="16" name="add_1" precision="FP32" type="Eltwise">
-			<input>
-				<port id="29">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-				<port id="31">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</input>
-			<output>
-				<port id="32">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="17" name="block3_sepconv1_act" precision="FP32" type="ReLU">
-			<input>
-				<port id="33">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</input>
-			<output>
-				<port id="35">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="140" name="block3_sepconv1_depth" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="128" kernel-x="3" kernel-y="3" output="128" pad-x="1" pad-y="1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="290">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</input>
-			<output>
-				<port id="291">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</output>
-			<weights offset="217088" size="4608"/>
-		</layer>
-		<layer id="141" name="block3_sepconv1_point" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="256" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="292">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</input>
-			<output>
-				<port id="293">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</output>
-			<weights offset="221696" size="131072"/>
-			<biases offset="352768" size="1024"/>
-		</layer>
-		<layer id="20" name="block3_sepconv2_act" precision="FP32" type="ReLU">
-			<input>
-				<port id="40">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</input>
-			<output>
-				<port id="41">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="142" name="block3_sepconv2_depth" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="256" kernel-x="3" kernel-y="3" output="256" pad-x="1" pad-y="1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="294">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</input>
-			<output>
-				<port id="295">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</output>
-			<weights offset="353792" size="9216"/>
-		</layer>
-		<layer id="143" name="block3_sepconv2_point" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="256" pad-x="0" pad-y="0" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="296">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</input>
-			<output>
-				<port id="297">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</output>
-			<weights offset="363008" size="262144"/>
-			<biases offset="625152" size="1024"/>
-		</layer>
-		<layer id="23" name="conv2d_2" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="256" pad-x="0" pad-y="0" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="34">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</input>
-			<output>
-				<port id="47">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>37</dim>
-					<dim>37</dim>
-				</port>
-			</output>
-			<weights offset="626176" size="131072"/>
-			<biases offset="757248" size="1024"/>
-		</layer>
-		<layer id="24" name="block3_pool" precision="FP32" type="Pooling">
-			<data kernel-x="3" kernel-y="3" pad-x="0" pad-y="0" pool-method="max" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="46">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>74</dim>
-					<dim>74</dim>
-				</port>
-			</input>
-			<output>
-				<port id="49">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>37</dim>
-					<dim>37</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="26" name="add_2" precision="FP32" type="Eltwise">
-			<input>
-				<port id="50">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>37</dim>
-					<dim>37</dim>
-				</port>
-				<port id="52">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>37</dim>
-					<dim>37</dim>
-				</port>
-			</input>
-			<output>
-				<port id="53">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>37</dim>
-					<dim>37</dim>
-				</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="1" from-port="1" to-layer="2" to-port="2"/>
-		<edge from-layer="2" from-port="3" to-layer="4" to-port="6"/>
-		<edge from-layer="4" from-port="7" to-layer="5" to-port="8"/>
-		<edge from-layer="5" from-port="9" to-layer="7" to-port="12"/>
-		<edge from-layer="7" from-port="13" to-layer="13" to-port="15"/>
-		<edge from-layer="137" from-port="285" to-layer="10" to-port="19"/>
-		<edge from-layer="139" from-port="289" to-layer="14" to-port="25"/>
-		<edge from-layer="14" from-port="28" to-layer="16" to-port="29"/>
-		<edge from-layer="13" from-port="26" to-layer="16" to-port="31"/>
-		<edge from-layer="16" from-port="32" to-layer="17" to-port="33"/>
-		<edge from-layer="16" from-port="32" to-layer="23" to-port="34"/>
-		<edge from-layer="141" from-port="293" to-layer="20" to-port="40"/>
-		<edge from-layer="143" from-port="297" to-layer="24" to-port="46"/>
-		<edge from-layer="24" from-port="49" to-layer="26" to-port="50"/>
-		<edge from-layer="23" from-port="47" to-layer="26" to-port="52"/>
-		<edge from-layer="7" from-port="13" to-layer="136" to-port="282"/>
-		<edge from-layer="136" from-port="283" to-layer="137" to-port="284"/>
-		<edge from-layer="10" from-port="20" to-layer="138" to-port="286"/>
-		<edge from-layer="138" from-port="287" to-layer="139" to-port="288"/>
-		<edge from-layer="17" from-port="35" to-layer="140" to-port="290"/>
-		<edge from-layer="140" from-port="291" to-layer="141" to-port="292"/>
-		<edge from-layer="20" from-port="41" to-layer="142" to-port="294"/>
-		<edge from-layer="142" from-port="295" to-layer="143" to-port="296"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {758272}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder) {
-            reorders_num++;
-            ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
-        }
-    }
-    ASSERT_EQ(reorders_num, 1);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForGrayscaleInput) {
-    std::string model = R"V0G0N(
-<net batch="1" name="xception" version="4">
-	<layers>
-		<layer id="1" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>40</dim>
-					<dim>40</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="2" name="conv1" precision="FP32" type="Convolution">
-			<data auto_pad="same_upper" dilations="1,1" group="1" kernel="3,3" output="32" pads_begin="0,0" pads_end="2,2" strides="1,1"/>
-			<input>
-				<port id="2">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>40</dim>
-					<dim>40</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>32</dim>
-					<dim>40</dim>
-					<dim>40</dim>
-				</port>
-			</output>
-			<weights offset="0" size="1152"/>
-			<biases offset="1152" size="128"/>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="1" from-port="1" to-layer="2" to-port="2"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {1280}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t reorders_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if (node->getType() == MKLDNNPlugin::Reorder) {
-            reorders_num++;
-            ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
-        }
-    }
-    ASSERT_EQ(reorders_num, 1);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestFailedPartPlateRecognitionBarrier0001) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>128</dim>
-                    <dim>1</dim>
-                    <dim>88</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="32" name="conv3_w" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="13" kernel-y="1" output="71" pad-x="6" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>1</dim>
-					<dim>88</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>71</dim>
-					<dim>1</dim>
-					<dim>88</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="472576"/>
-				<biases offset="472576" size="284"/>
-			</blobs>
-		</layer>
-		<layer id="33" name="relu_conv3_w" precision="FP32" type="ReLU">
-			<data negative_slope="0.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>71</dim>
-					<dim>1</dim>
-					<dim>88</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>71</dim>
-					<dim>1</dim>
-					<dim>88</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="34" name="pattern" precision="FP32" type="FullyConnected">
-			<data out-size="128"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>71</dim>
-					<dim>1</dim>
-					<dim>88</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>128</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="472860" size="3198976"/>
-				<biases offset="3671836" size="512"/>
-			</blobs>
-		</layer>
-		<layer id="35" name="reshape" precision="FP32" type="Reshape">
-			<data axis="0" dim="-1,128,1,1" num_axes="-1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>128</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>1</dim>
-					<dim>1</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="36" name="tile" precision="FP32" type="Tile">
-			<data axis="3" tiles="88"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>1</dim>
-					<dim>1</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>1</dim>
-					<dim>88</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="37" name="concat" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>71</dim>
-					<dim>1</dim>
-					<dim>88</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>1</dim>
-					<dim>88</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>199</dim>
-					<dim>1</dim>
-					<dim>88</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-		<edge from-layer="0" from-port="0" to-layer="32" to-port="0"/>
-		<edge from-layer="32" from-port="3" to-layer="33" to-port="0"/>
-		<edge from-layer="33" from-port="1" to-layer="34" to-port="0"/>
-		<edge from-layer="34" from-port="3" to-layer="35" to-port="0"/>
-		<edge from-layer="35" from-port="1" to-layer="36" to-port="0"/>
-		<edge from-layer="33" from-port="1" to-layer="37" to-port="0"/>
-		<edge from-layer="36" from-port="1" to-layer="37" to-port="1"/>
-    </edges>
-</net>)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {3672348}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 128, 1, 88}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(desc);
-    src1->allocate();
-    fill_data((float *) src1->buffer(), src1->size());
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src1));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    std::vector<float> refDst(output->size());
-    auto *data = output->buffer().as<float *>();
-    for (size_t i = 0; i < output->size(); i++) {
-        refDst[i] = data[output->getTensorDesc().offset(i)];
-    }
-
-    // Compare for batch2
-    network.setBatchSize(2);
-    graph.CreateGraph(network);
-    desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {2, 128, 1, 88}, InferenceEngine::NCHW);
-
-    InferenceEngine::Blob::Ptr src1Batch = InferenceEngine::make_shared_blob<float>(desc);
-    src1Batch->allocate();
-    data = src1Batch->buffer().as<float *>();
-    auto *originData = src1->buffer().as<float *>();
-    for(size_t b = 0; b < 2; b++) {
-        for (size_t i = 0; i < src1->size(); i++) {
-            data[src1Batch->getTensorDesc().offset(b*src1->size() + i)] = originData[src1->getTensorDesc().offset(i)];
-        }
-    }
-
-    srcs.clear();
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src1Batch));
-    out = network.getOutputsInfo();
-
-    outputBlobs.clear();
-    item = *out.begin();
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    dstOut->allocate();
-    data = dstOut->buffer().as<float *>();
-    for(size_t b = 0; b < 2; b++) {
-        for (size_t i = 0; i < refDst.size(); i++) {
-            data[dstOut->getTensorDesc().offset(b*refDst.size() + i)] = refDst[i];
-        }
-    }
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestFailedVNect0001) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>23</dim>
-					<dim>23</dim>
-                </port>
-            </output>
-        </layer>
- 		<layer id="207" name="res5c_branch1a" precision="FP32" type="Deconvolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="4" kernel-y="4" output="63" pad-x="1" pad-y="1" stride="1,1,2,2" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>256</dim>
-					<dim>23</dim>
-					<dim>23</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>63</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="1032192"/>
-			</blobs>
-		</layer>
-		<layer id="347" name="res5c_branch1a_sqr" precision="FP32" type="Eltwise">
-			<data operation="mul"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>63</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>63</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>63</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="236" name="split_res5c_branch1a" precision="FP32" type="Slice">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>63</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="3">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-		<edge from-layer="0" from-port="0" to-layer="207" to-port="0"/>
-		<edge from-layer="207" from-port="2" to-layer="347" to-port="0"/>
-		<edge from-layer="207" from-port="2" to-layer="347" to-port="1"/>
-		<edge from-layer="207" from-port="2" to-layer="236" to-port="0"/>
-    </edges>
-</net>)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, { 1032192 }, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *)weights->buffer(), weights->size() / sizeof(float));
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    ASSERT_NO_THROW(graph.CreateGraph(network));
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestFailedVNect0002) {
-    std::string model = R"V0G0N(
-<net batch="1" name="vnect" version="2">
-	<layers>
-		<layer id="1" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="32" name="res5c_branch2c" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="1" kernel-y="1" output="84" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>84</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="43008"/>
-			</blobs>
-		</layer>
-		<layer id="160" name="slice_heatmaps" precision="FP32" type="Slice">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>84</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="3">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="4">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-		<edge from-layer="1" from-port="1" to-layer="32" to-port="0"/>
-		<edge from-layer="32" from-port="2" to-layer="160" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, { 43008 }, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *)weights->buffer(), weights->size() / sizeof(float));
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    size_t outputs_num = 0;
-    auto& nodes = graph.getNodes();
-    for (auto &node : nodes) {
-        if ( node->getType() == MKLDNNPlugin::Output &&
-             (node->getName() == "out_slice_heatmaps.0" ||
-              node->getName() == "out_slice_heatmaps.1" ||
-              node->getName() == "out_slice_heatmaps.2" ||
-              node->getName() == "out_slice_heatmaps.3" ) ) {
-            outputs_num++;
-        }
-    }
-    ASSERT_EQ(outputs_num, 4);
-}
-
-
-TEST_F(MKLDNNGraphStructureTests, TestFailedVNect0003) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="data2" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="0">
-					<dim>1</dim>
-					<dim>63</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="data3" type="Input" precision="FP32" id="2">
-            <output>
-                <port id="0">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-                </port>
-            </output>
-        </layer>
-		<layer id="86" name="res5c_branch2a_relu" precision="FP32" type="ReLU">
-			<data engine="caffe.ReLUParameter.DEFAULT" negative_slope="0.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="236" name="split_res5c_branch1a" precision="FP32" type="Slice">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>63</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="3">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="67" name="res5c_bone_length" precision="FP32" type="Power">
-			<data power="0.5" scale="1.0" shift="0.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="24" name="res5c_branch2a_feat" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="3">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-				<port id="4">
-					<dim>1</dim>
-					<dim>21</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</input>
-			<output>
-				<port id="5">
-					<dim>1</dim>
-					<dim>212</dim>
-					<dim>46</dim>
-					<dim>46</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-		<edge from-layer="0" from-port="0" to-layer="86" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="236" to-port="0"/>
-        <edge from-layer="2" from-port="0" to-layer="67" to-port="0"/>
-		<edge from-layer="86" from-port="1" to-layer="24" to-port="0"/>
-		<edge from-layer="236" from-port="1" to-layer="24" to-port="1"/>
-		<edge from-layer="236" from-port="2" to-layer="24" to-port="2"/>
-		<edge from-layer="236" from-port="3" to-layer="24" to-port="3"/>
-		<edge from-layer="67" from-port="1" to-layer="24" to-port="4"/>
-    </edges>
-</net>)V0G0N";
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-
-    MKLDNNGraphTestClass graph;
-    ASSERT_NO_THROW(graph.CreateGraph(network));
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestConvolutionDWConvolutionSumFusing) {
-    std::string model = R"V0G0N(
-<net name="net" version="4" batch="1">
-    <layers>
-        <layer name="data0" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="data1" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>150</dim>
-                    <dim>300</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv0" type="Convolution" precision="FP32" id="2">
-			<data auto_pad="same_upper" dilations="1,1" group="1" kernel="1,1" output="48" pads_end="0, 0" pads_begin="150,300" strides="1,1"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-            <weights offset="0" size="6144"/>
-            <biases offset="6144" size="192"/>
-        </layer>
-        <layer name="conv1" type="Convolution" precision="FP32" id="3">
-			<data auto_pad="same_upper" dilations="1,1" group="48" kernel="3,3" output="48" pads_end="1,1" pads_begin="1,1" strides="2,2"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>150</dim>
-                    <dim>300</dim>
-                </port>
-            </output>
-            <weights offset="6336" size="1728"/>
-            <biases offset="7872" size="192"/>
-        </layer>
-        <layer name="eltwise" type="Eltwise" precision="FP32" id="4">
-            <data operation="sum"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>150</dim>
-                    <dim>300</dim>
-                </port>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>150</dim>
-                    <dim>300</dim>
-                </port>
-            </input>
-            <output>
-                <port id="2">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>150</dim>
-                    <dim>300</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="relu" type="ReLU" precision="FP32" id="5">
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>150</dim>
-                    <dim>300</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>150</dim>
-                    <dim>300</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="power" type="Power" precision="FP32" id="6">
-            <data power="1" scale="-1" shift="0"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>150</dim>
-                    <dim>300</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>150</dim>
-                    <dim>300</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
-        <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-        <edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
-        <edge from-layer="1" from-port="0" to-layer="4" to-port="1"/>
-        <edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
-        <edge from-layer="5" from-port="1" to-layer="6" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {8064}, InferenceEngine::C });
-    weights->allocate();
-    float * data = weights->buffer();
-    memset((float *) weights->buffer(), 0, weights->size());
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    network = core.ReadNetwork(model, weights_ptr);
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    InferenceEngine::TensorDesc src0_desc(InferenceEngine::Precision::FP32, {1, 32, 300, 600}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src0 = InferenceEngine::make_shared_blob<float>(src0_desc);
-    src0->allocate();
-    data = src0->buffer().as<float *>();
-    for (size_t i = 0; i < src0->size(); i++) {
-        data[i] = 0;
-    }
-
-    InferenceEngine::TensorDesc src1_desc(InferenceEngine::Precision::FP32, {1, 48, 150, 300}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>(src1_desc);
-    src1->allocate();
-    data = src1->buffer().as<float *>();
-    for (size_t i = 0; i < src1->size(); i++) {
-        data[i] = i % 10;
-    }
-
-    std::vector<float> refDst(src1->size());
-    for (size_t i = 0; i < refDst.size(); i++) {
-        refDst[i] = -1 * data[i];
-    }
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data0", src0));
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data1", src1));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestConstantLayerAsOutput) {
-    std::string model = R"V0G0N(
-<net batch="1" name="ResNet10_SSD" version="2">
-	<layers>
-		<layer id="0" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>10</dim>
-					<dim>10</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="1" name="Add_" precision="FP32" type="ScaleShift">
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>10</dim>
-					<dim>10</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>10</dim>
-					<dim>10</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="12"/>
-				<biases offset="12" size="12"/>
-			</blobs>
-		</layer>
-		<layer id="2" name="Convolution1" precision="FP32" type="Convolution">
-			<data dilation-x="1" dilation-y="1" group="1" kernel-x="7" kernel-y="7" output="64" pad-x="3" pad-y="3" stride="1,1,2,2" stride-x="2" stride-y="2"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>10</dim>
-					<dim>10</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>5</dim>
-					<dim>5</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="24" size="37632"/>
-				<biases offset="37656" size="256"/>
-			</blobs>
-		</layer>
-		<layer id="3" name="x32_priorbox" precision="FP32" type="PriorBoxClustered">
-			<data clip="0" flip="0" height="118.25800323486328,105.21199798583984,141.15499877929688,128.63600158691406,174.2689971923828,176.98300170898438" offset="0.5" step="32.0" variance="0.10000000149011612,0.10000000149011612,0.20000000298023224,0.20000000298023224" width="104.06500244140625,130.3560028076172,136.86500549316406,179.89199829101562,181.1739959716797,248.28199768066406"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>5</dim>
-					<dim>5</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>10</dim>
-					<dim>10</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>600</dim>
-				</port>
-			</output>
-		</layer>
-	</layers>
-	<edges>
-		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-		<edge from-layer="1" from-port="3" to-layer="2" to-port="0"/>
-		<edge from-layer="2" from-port="3" to-layer="3" to-port="0"/>
-		<edge from-layer="1" from-port="3" to-layer="3" to-port="1"/>
-	</edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {37912}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, {1, 3, 10, 10}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
-    src->allocate();
-    auto *data = src->buffer().as<float *>();
-    size_t sizeB1 = src->size() / 2;
-    fill_data(data, sizeB1);
-    for (size_t i = 0; i < sizeB1; i++) {
-        data[sizeB1 + i] = data[i];
-    }
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    std::vector<float> refDst = {-3.603f,-4.313f,6.803f,7.513f,-4.918f,-3.661f,8.118f,6.861f,-5.243f,-5.458f,8.443f,8.658f,-7.395f,-4.832f,10.595f,8.032f,
-                                 -7.459f,-7.113f,10.659f,10.313f,-10.814f,-7.249f,14.014f,10.449f,-0.403f,-4.313f,10.003f,7.513f,-1.718f,-3.661f,11.318f,6.861f,
-                                 -2.043f,-5.458f,11.643f,8.658f,-4.195f,-4.832f,13.795f,8.032f,-4.259f,-7.113f,13.859f,10.313f,-7.614f,-7.249f,17.214f,10.449f,
-                                 2.797f,-4.313f,13.203f,7.513f,1.482f,-3.661f,14.518f,6.861f,1.157f,-5.458f,14.843f,8.658f,-0.995f,-4.832f,16.995f,8.032f,
-                                 -1.059f,-7.113f,17.059f,10.313f,-4.414f,-7.249f,20.414f,10.449f,5.997f,-4.313f,16.403f,7.513f,4.682f,-3.661f,17.718f,6.861f,
-                                 4.357f,-5.458f,18.043f,8.658f,2.205f,-4.832f,20.195f,8.032f,2.141f,-7.113f,20.259f,10.313f,-1.214f,-7.249f,23.614f,10.449f,
-                                 9.197f,-4.313f,19.603f,7.513f,7.882f,-3.661f,20.918f,6.861f,7.557f,-5.458f,21.243f,8.658f,5.405f,-4.832f,23.395f,8.032f,5.341f,
-                                 -7.113f,23.459f,10.313f,1.986f,-7.249f,26.814f,10.449f,-3.603f,-1.113f,6.803f,10.713f,-4.918f,-0.461f,8.118f,10.061f,-5.243f,-2.258f,
-                                 8.443f,11.858f,-7.395f,-1.632f,10.595f,11.232f,-7.459f,-3.913f,10.659f,13.513f,-10.814f,-4.049f,14.014f,13.649f,-0.403f,-1.113f,
-                                 10.003f,10.713f,-1.718f,-0.461f,11.318f,10.061f,-2.043f,-2.258f,11.643f,11.858f,-4.195f,-1.632f,13.795f,11.232f,-4.259f,-3.913f,
-                                 13.859f,13.513f,-7.614f,-4.049f,17.214f,13.649f,2.797f,-1.113f,13.203f,10.713f,1.482f,-0.461f,14.518f,10.061f,1.157f,-2.258f,14.843f,
-                                 11.858f,-0.995f,-1.632f,16.995f,11.232f,-1.059f,-3.913f,17.059f,13.513f,-4.414f,-4.049f,20.414f,13.649f,5.997f,-1.113f,16.403f,10.713f,
-                                 4.682f,-0.461f,17.718f,10.061f,4.357f,-2.258f,18.043f,11.858f,2.205f,-1.632f,20.195f,11.232f,2.141f,-3.913f,20.259f,13.513f,-1.214f,
-                                 -4.049f,23.614f,13.649f,9.197f,-1.113f,19.603f,10.713f,7.882f,-0.461f,20.918f,10.061f,7.557f,-2.258f,21.243f,11.858f,5.405f,-1.632f,
-                                 23.395f,11.232f,5.341f,-3.913f,23.459f,13.513f,1.986f,-4.049f,26.814f,13.649f,-3.603f,2.087f,6.803f,13.913f,-4.918f,2.739f,8.118f,
-                                 13.261f,-5.243f,0.942f,8.443f,15.058f,-7.395f,1.568f,10.595f,14.432f,-7.459f,-0.713f,10.659f,16.713f,-10.814f,-0.849f,14.014f,16.849f,
-                                 -0.403f,2.087f,10.003f,13.913f,-1.718f,2.739f,11.318f,13.261f,-2.043f,0.942f,11.643f,15.058f,-4.195f,1.568f,13.795f,14.432f,-4.259f,
-                                 -0.713f,13.859f,16.713f,-7.614f,-0.849f,17.214f,16.849f,2.797f,2.087f,13.203f,13.913f,1.482f,2.739f,14.518f,13.261f,1.157f,0.942f,14.843f,
-                                 15.058f,-0.995f,1.568f,16.995f,14.432f,-1.059f,-0.713f,17.059f,16.713f,-4.414f,-0.849f,20.414f,16.849f,5.997f,2.087f,16.403f,13.913f,
-                                 4.682f,2.739f,17.718f,13.261f,4.357f,0.942f,18.043f,15.058f,2.205f,1.568f,20.195f,14.432f,2.141f,-0.713f,20.259f,16.713f,-1.214f,-0.849f,
-                                 23.614f,16.849f,9.197f,2.087f,19.603f,13.913f,7.882f,2.739f,20.918f,13.261f,7.557f,0.942f,21.243f,15.058f,5.405f,1.568f,23.395f,14.432f,
-                                 5.341f,-0.713f,23.459f,16.713f,1.986f,-0.849f,26.814f,16.849f,-3.603f,5.287f,6.803f,17.113f,-4.918f,5.939f,8.118f,16.461f,-5.243f,4.142f,
-                                 8.443f,18.258f,-7.395f,4.768f,10.595f,17.632f,-7.459f,2.487f,10.659f,19.913f,-10.814f,2.351f,14.014f,20.049f,-0.403f,5.287f,10.003f,
-                                 17.113f,-1.718f,5.939f,11.318f,16.461f,-2.043f,4.142f,11.643f,18.258f,-4.195f,4.768f,13.795f,17.632f,-4.259f,2.487f,13.859f,19.913f,
-                                 -7.614f,2.351f,17.214f,20.049f,2.797f,5.287f,13.203f,17.113f,1.482f,5.939f,14.518f,16.461f,1.157f,4.142f,14.843f,18.258f,-0.995f,4.768f,
-                                 16.995f,17.632f,-1.059f,2.487f,17.059f,19.913f,-4.414f,2.351f,20.414f,20.049f,5.997f,5.287f,16.403f,17.113f,4.682f,5.939f,17.718f,16.461f,
-                                 4.357f,4.142f,18.043f,18.258f,2.205f,4.768f,20.195f,17.632f,2.141f,2.487f,20.259f,19.913f,-1.214f,2.351f,23.614f,20.049f,9.197f,5.287f,
-                                 19.603f,17.113f,7.882f,5.939f,20.918f,16.461f,7.557f,4.142f,21.243f,18.258f,5.405f,4.768f,23.395f,17.632f,5.341f,2.487f,23.459f,19.913f,
-                                 1.986f,2.351f,26.814f,20.049f,-3.603f,8.487f,6.803f,20.313f,-4.918f,9.139f,8.118f,19.661f,-5.243f,7.342f,8.443f,21.458f,-7.395f,7.968f,
-                                 10.595f,20.832f,-7.459f,5.687f,10.659f,23.113f,-10.814f,5.551f,14.014f,23.249f,-0.403f,8.487f,10.003f,20.313f,-1.718f,9.139f,11.318f,
-                                 19.661f,-2.043f,7.342f,11.643f,21.458f,-4.195f,7.968f,13.795f,20.832f,-4.259f,5.687f,13.859f,23.113f,-7.614f,5.551f,17.214f,23.249f,2.797f,
-                                 8.487f,13.203f,20.313f,1.482f,9.139f,14.518f,19.661f,1.157f,7.342f,14.843f,21.458f,-0.995f,7.968f,16.995f,20.832f,-1.059f,5.687f,17.059f,
-                                 23.113f,-4.414f,5.551f,20.414f,23.249f,5.997f,8.487f,16.403f,20.313f,4.682f,9.139f,17.718f,19.661f,4.357f,7.342f,18.043f,21.458f,2.205f,
-                                 7.968f,20.195f,20.832f,2.141f,5.687f,20.259f,23.113f,-1.214f,5.551f,23.614f,23.249f,9.197f,8.487f,19.603f,20.313f,7.882f,9.139f,20.918f,
-                                 19.661f,7.557f,7.342f,21.243f,21.458f,5.405f,7.968f,23.395f,20.832f,5.341f,5.687f,23.459f,23.113f,1.986f,5.551f,26.814f,23.249f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,
-                                 0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f,0.100f,0.100f,0.200f,0.200f};
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestGemmConvolutionWithConcat) {
-    std::string model = R"V0G0N(
-<net batch="1" name="squeezenet1.1" version="3">
-	<layers>
-		<layer id="0" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>227</dim>
-					<dim>227</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="1" name="conv1" precision="FP32" type="Convolution">
-			<data dilations="1,1" group="1" kernel="3,3" output="64" pads_begin="0,0" pads_end="0,0" strides="2,2"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>227</dim>
-					<dim>227</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>113</dim>
-					<dim>113</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="6912"/>
-				<biases offset="6912" size="256"/>
-			</blobs>
-		</layer>
-		<layer id="2" name="relu_conv1" precision="FP32" type="ReLU">
-			<data negative_slope="0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>113</dim>
-					<dim>113</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>113</dim>
-					<dim>113</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="3" name="pool1" precision="FP32" type="Pooling">
-			<data exclude-pad="false" kernel="3,3" pads_begin="0,0" pads_end="0,0" pool-method="max" rounding_type="ceil" strides="2,2"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>113</dim>
-					<dim>113</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="4" name="fire2/squeeze1x1" precision="FP32" type="Convolution">
-			<data dilation="1,1" group="1" kernel="1,1" output="16" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="7168" size="4096"/>
-				<biases offset="11264" size="64"/>
-			</blobs>
-		</layer>
-		<layer id="5" name="fire2/relu_squeeze1x1" precision="FP32" type="ReLU">
-			<data negative_slope="0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="6" name="fire2/expand1x1" precision="FP32" type="Convolution">
-			<data dilation="1,1" group="1" kernel="1,1" output="64" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="11328" size="4096"/>
-				<biases offset="15424" size="256"/>
-			</blobs>
-		</layer>
-		<layer id="7" name="fire2/relu_expand1x1" precision="FP32" type="ReLU">
-			<data negative_slope="0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="8" name="fire2/expand3x3" precision="FP32" type="Convolution">
-			<data dilation="1,1" group="1" kernel="3,3" output="64" pads_begin="1,1" pads_end="1,1" strides="1,1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="15680" size="36864"/>
-				<biases offset="52544" size="256"/>
-			</blobs>
-		</layer>
-		<layer id="9" name="fire2/relu_expand3x3" precision="FP32" type="ReLU">
-			<data negative_slope="0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="10" name="fire2/concat" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>128</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-		<edge from-layer="1" from-port="3" to-layer="2" to-port="0"/>
-		<edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-		<edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
-		<edge from-layer="4" from-port="3" to-layer="5" to-port="0"/>
-		<edge from-layer="5" from-port="1" to-layer="6" to-port="0"/>
-		<edge from-layer="6" from-port="3" to-layer="7" to-port="0"/>
-		<edge from-layer="5" from-port="1" to-layer="8" to-port="0"/>
-		<edge from-layer="8" from-port="3" to-layer="9" to-port="0"/>
-		<edge from-layer="7" from-port="1" to-layer="10" to-port="0"/>
-		<edge from-layer="9" from-port="1" to-layer="10" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {52800}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    auto graphInfer = [](InferenceEngine::CNNNetwork network, InferenceEngine::BlobMap& inBlobs,
-            InferenceEngine::BlobMap& outBlobs, std::string primitivesPriority) {
-        for (auto it = InferenceEngine::details::CNNNetworkIterator(network); !primitivesPriority.empty() &&
-            it != InferenceEngine::details::CNNNetworkIterator(); it++) {
-            (*it)->params["PrimitivesPriority"] = primitivesPriority;
-        }
-
-        MKLDNNGraphTestClass graph;
-        graph.CreateGraph(network);
-        graph.Infer(inBlobs, outBlobs);
-    };
-
-    InferenceEngine::InputsDataMap inputsMap = network.getInputsInfo();
-    InferenceEngine::BlobMap inputBlobs;
-
-    for (const auto& input : inputsMap) {
-        InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(input.second->getTensorDesc());
-        src->allocate();
-        fill_data((float *) src->buffer(), src->size());
-        inputBlobs[input.first] = src;
-    }
-
-    InferenceEngine::OutputsDataMap outsMap = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs1;
-    InferenceEngine::BlobMap outputBlobs2;
-    for (const auto& output : outsMap) {
-        InferenceEngine::TBlob<float>::Ptr dst1, dst2;
-        dst1 = InferenceEngine::make_shared_blob<float>(output.second->getTensorDesc());
-        dst1->allocate();
-        outputBlobs1[output.first] = dst1;
-        dst2 = InferenceEngine::make_shared_blob<float>(output.second->getTensorDesc());
-        dst2->allocate();
-        outputBlobs2[output.first] = dst2;
-    }
-
-    graphInfer(network, inputBlobs, outputBlobs1, "");
-    graphInfer(network, inputBlobs, outputBlobs2, "cpu:gemm_blas");
-    compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
-
-    graphInfer(network, inputBlobs, outputBlobs2, "cpu:gemm_avx512");
-    compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
-
-    graphInfer(network, inputBlobs, outputBlobs2, "cpu:gemm_avx2");
-    compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
-
-    graphInfer(network, inputBlobs, outputBlobs2, "cpu:gemm_sse42");
-    compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
-
-    graphInfer(network, inputBlobs, outputBlobs2, "cpu:gemm_any");
-    compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
-}
-
-
-TEST_F(MKLDNNGraphStructureTests, TestRefPoolingWithConcat) {
-    std::string model = R"V0G0N(
-<net batch="1" name="squeezenet1.1" version="3">
-	<layers>
-		<layer id="0" name="data" precision="FP32" type="Input">
-			<output>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>227</dim>
-					<dim>227</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="1" name="conv1" precision="FP32" type="Convolution">
-			<data dilation="1,1" group="1" kernel="3,3" output="64" pads_begin="0,0" pads_end="0,0" strides="2,2"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>3</dim>
-					<dim>227</dim>
-					<dim>227</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>113</dim>
-					<dim>113</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="0" size="6912"/>
-				<biases offset="6912" size="256"/>
-			</blobs>
-		</layer>
-		<layer id="2" name="relu_conv1" precision="FP32" type="ReLU">
-			<data negative_slope="0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>113</dim>
-					<dim>113</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>113</dim>
-					<dim>113</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="3" name="pool1" precision="FP32" type="Pooling">
-			<data exclude-pad="false" kernel="3,3" pads_begin="0,0" pads_end="0,0" pool-method="max" rounding_type="ceil" strides="2,2"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>113</dim>
-					<dim>113</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="4" name="fire2/squeeze1x1" precision="FP32" type="Convolution">
-			<data dilation="1,1" group="1" kernel="1,1" output="16" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="7168" size="4096"/>
-				<biases offset="11264" size="64"/>
-			</blobs>
-		</layer>
-		<layer id="5" name="fire2/relu_squeeze1x1" precision="FP32" type="ReLU">
-			<data negative_slope="0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="6" name="fire2/expand1x1" precision="FP32" type="Convolution">
-			<data dilation="1,1" group="1" kernel="1,1" output="64" pads_begin="0,0" pads_end="0,0" strides="1,1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="11328" size="4096"/>
-				<biases offset="15424" size="256"/>
-			</blobs>
-		</layer>
-		<layer id="7" name="fire2/relu_expand1x1" precision="FP32" type="ReLU">
-			<data negative_slope="0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="8" name="fire2/expand3x3" precision="FP32" type="Pooling">
-			<data exclude-pad="false" kernel="3,3" pads_begin="1,1" pool-method="avg" rounding_type="ceil" stride="1,1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="3">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-			<blobs>
-				<weights offset="15680" size="36864"/>
-				<biases offset="52544" size="256"/>
-			</blobs>
-		</layer>
-		<layer id="10" name="fire2/concat" precision="FP32" type="Concat">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>64</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-				<port id="1">
-					<dim>1</dim>
-					<dim>16</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</input>
-			<output>
-				<port id="2">
-					<dim>1</dim>
-					<dim>80</dim>
-					<dim>56</dim>
-					<dim>56</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-		<edge from-layer="1" from-port="3" to-layer="2" to-port="0"/>
-		<edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-		<edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
-		<edge from-layer="4" from-port="3" to-layer="5" to-port="0"/>
-		<edge from-layer="5" from-port="1" to-layer="6" to-port="0"/>
-		<edge from-layer="6" from-port="3" to-layer="7" to-port="0"/>
-		<edge from-layer="5" from-port="1" to-layer="8" to-port="0"/>
-		<edge from-layer="7" from-port="1" to-layer="10" to-port="0"/>
-		<edge from-layer="8" from-port="3" to-layer="10" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {52800}, InferenceEngine::C });
-    weights->allocate();
-    fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    auto graphInfer = [](InferenceEngine::CNNNetwork network, InferenceEngine::BlobMap& inBlobs,
-                         InferenceEngine::BlobMap& outBlobs, std::string primitivesPriority) {
-        for (auto it = InferenceEngine::details::CNNNetworkIterator(network); !primitivesPriority.empty() &&
-            it != InferenceEngine::details::CNNNetworkIterator(); it++) {
-            (*it)->params["PrimitivesPriority"] = primitivesPriority;
-        }
-
-        MKLDNNGraphTestClass graph;
-        graph.CreateGraph(network);
-        graph.Infer(inBlobs, outBlobs);
-    };
-
-    InferenceEngine::InputsDataMap inputsMap = network.getInputsInfo();
-    InferenceEngine::BlobMap inputBlobs;
-
-    for (const auto& input : inputsMap) {
-        InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(input.second->getTensorDesc());
-        src->allocate();
-        fill_data((float *) src->buffer(), src->size());
-        inputBlobs[input.first] = src;
-    }
-
-    InferenceEngine::OutputsDataMap outsMap = network.getOutputsInfo();
-    InferenceEngine::BlobMap outputBlobs1;
-    InferenceEngine::BlobMap outputBlobs2;
-    for (const auto& output : outsMap) {
-        InferenceEngine::TBlob<float>::Ptr dst1, dst2;
-        dst1 = InferenceEngine::make_shared_blob<float>(output.second->getTensorDesc());
-        dst1->allocate();
-        outputBlobs1[output.first] = dst1;
-        dst2 = InferenceEngine::make_shared_blob<float>(output.second->getTensorDesc());
-        dst2->allocate();
-        outputBlobs2[output.first] = dst2;
-    }
-
-    graphInfer(network, inputBlobs, outputBlobs1, "");
-    graphInfer(network, inputBlobs, outputBlobs2, "cpu:gemm_blas,cpu:ref_any");
-    compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
-
-    graphInfer(network, inputBlobs, outputBlobs2, "cpu:ref_any");
-    compare(*outputBlobs1.begin()->second, *outputBlobs2.begin()->second);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestConvolutionWith2DepthwiseOpFusing) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv" type="Convolution" precision="FP32" id="1">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="48" group="1"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>32</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-            <weights offset="0" size="6144"/>
-            <biases offset="6144" size="192"/>
-        </layer>
-        <layer name="depthwise0" type="PReLU" precision="FP32" id="2">
-            <data channel_shared="1"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-            <weights offset="6336" size="4"/>
-        </layer>
-        <layer name="depthwise1" type="ScaleShift" precision="FP32" id="3">
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-            <weights offset="6340" size="192"/>
-            <biases offset="6532" size="192"/>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
-        <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {6724}, InferenceEngine::C });
-    weights->allocate();
-    float* wdata = weights->buffer();
-
-    for (int i = 0; i < weights->size() / sizeof(float); i++)
-        wdata[i] = 1;
-    wdata[1584] = 2; // 2 for prelu weights
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-        InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    const auto& nodes = graph.getNodes();
-    ASSERT_EQ(nodes.size(), 5);
-    ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
-    ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Convolution);
-    ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
-    ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
-
-    InferenceEngine::TensorDesc src_desc(InferenceEngine::Precision::FP32, {1, 32, 300, 600}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(src_desc);
-    src->allocate();
-    float* sdata = src->buffer().as<float *>();
-    for (size_t i = 0; i < src->size(); i++) {
-        sdata[i] = -1;
-    }
-
-    std::vector<float> refDst(1 * 48 * 300 * 600);
-    for (size_t i = 0; i < refDst.size(); i++) {
-        refDst[i] = -61; // (-32 + 1) * 2 * 1 + 1
-    }
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestConvolutionWith2EltwiseOpFusing) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>1</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv" type="Convolution" precision="FP32" id="1">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="48" group="1"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>1</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-            <weights offset="0" size="192"/>
-            <biases offset="192" size="192"/>
-        </layer>
-        <layer name="eltwise0" type="Logistic" precision="FP32" id="2">
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="eltwise1" type="Clamp" precision="FP32" id="3">
-            <data max="1" min="0.3"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>48</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
-        <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {384}, InferenceEngine::C });
-    weights->allocate();
-    float* wdata = weights->buffer();
-
-    for (int i = 0; i < weights->size() / sizeof(float); i++)
-        wdata[i] = 1;
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    const auto& nodes = graph.getNodes();
-    ASSERT_EQ(nodes.size(), 4);
-    ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
-    ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution);
-    ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
-    ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Output);
-
-    InferenceEngine::TensorDesc src_desc(InferenceEngine::Precision::FP32, {1, 1, 300, 600}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(src_desc);
-    src->allocate();
-    float* sdata = src->buffer().as<float *>();
-    for (size_t i = 0; i < src->size(); i++) {
-        sdata[i] = i % 2 == 0 ? 2 : -2;
-    }
-
-    std::vector<float> refDst(1 * 48 * 300 * 600);
-    for (size_t i = 0; i < refDst.size(); i++) {
-        refDst[i] = i % 2 == 0 ? 0.952574127f : 0.3f;
-    }
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestGemmConvolutionWith2DepthwiseOpFusing) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-        </layer>
-        <layer name="conv" type="Convolution" precision="FP32" id="1">
-            <convolution_data stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="8" group="2"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-            <weights offset="0" size="128"/>
-            <biases offset="128" size="32"/>
-        </layer>
-        <layer name="depthwise0" type="PReLU" precision="FP32" id="2">
-            <data channel_shared="1"/>
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-            <weights offset="160" size="4"/>
-        </layer>
-        <layer name="depthwise1" type="ScaleShift" precision="FP32" id="3">
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </input>
-            <output>
-                <port id="1">
-                    <dim>1</dim>
-                    <dim>8</dim>
-                    <dim>300</dim>
-                    <dim>600</dim>
-                </port>
-            </output>
-            <weights offset="164" size="32"/>
-            <biases offset="196" size="32"/>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
-        <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, {228}, InferenceEngine::C });
-    weights->allocate();
-    float* wdata = weights->buffer();
-
-    for (int i = 0; i < weights->size() / sizeof(float); i++)
-        wdata[i] = 1;
-    wdata[40] = 2; // 2 for prelu weights
-
-    InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    const auto& nodes = graph.getNodes();
-    ASSERT_EQ(nodes.size(), 3);
-    ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
-    ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution);
-    ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
-    ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Output);
-
-    InferenceEngine::TensorDesc src_desc(InferenceEngine::Precision::FP32, {1, 8, 300, 600}, InferenceEngine::NCHW);
-    InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(src_desc);
-    src->allocate();
-    float* sdata = src->buffer().as<float *>();
-    for (size_t i = 0; i < src->size(); i++) {
-        sdata[i] = -1;
-    }
-
-    std::vector<float> refDst(1 * 8 * 300 * 600);
-    for (size_t i = 0; i < refDst.size(); i++) {
-        refDst[i] = -5; // (-4 + 1) * 2 * 1 + 1
-    }
-
-    InferenceEngine::BlobMap srcs;
-    srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("data", src));
-
-    InferenceEngine::OutputsDataMap out = network.getOutputsInfo();
-
-    InferenceEngine::BlobMap outputBlobs;
-    std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
-
-    InferenceEngine::TBlob<float>::Ptr output;
-    output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-    output->allocate();
-    outputBlobs[item.first] = output;
-
-    graph.Infer(srcs, outputBlobs);
-
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc(), refDst.data());
-
-    compare(*output, *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithSplit) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>8</dim>
-                    <dim>8</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="71" name="Split" precision="FP32" type="Split">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="71" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-    const size_t batchHeight = 8;
-    const size_t batchWidth = 8;
-    const InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::FP32, { 1, 2, batchHeight, batchWidth }, InferenceEngine::NCHW);
-    const size_t batchSize = batchHeight * batchWidth;
-    const float channel1Value = 1.0;
-    const float channel2Value = 2.0;
-
-    InferenceEngine::Blob::Ptr inputBlob = InferenceEngine::make_shared_blob<float>(tensorDesc);
-    inputBlob->allocate();
-    float* inputData = inputBlob->buffer().as<float *>();
-    for (size_t i = 0; i < inputBlob->size(); i++) {
-        inputData[i] = (i < batchSize) ? channel1Value : channel2Value;
-    }
-
-    InferenceEngine::TBlob<uint8_t>* weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, { 228 }, InferenceEngine::C });
-    weights->allocate();
-    float* weightsData = weights->buffer();
-    for (size_t i = 0ULL; i < weights->size() / sizeof(float); i++) {
-        weightsData[i] = 1.0;
-    }
-
-    const InferenceEngine::TBlob<uint8_t>::Ptr weightsPtr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-    
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weightsPtr));
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    const auto& nodes = graph.getNodes();
-    ASSERT_EQ(nodes.size(), 5);
-    ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
-    ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Split);
-    ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Output);
-    ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
-
-    InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
-    const std::pair<std::string, InferenceEngine::DataPtr> splitOutputItem1 {"Split.0", outputs["Split.0"]};
-    const std::pair<std::string, InferenceEngine::DataPtr> splitOutputItem2 {"Split.1", outputs["Split.1"]};
-
-    std::vector<float> splitExpectedOutputData1(batchSize);
-    std::vector<float> splitExpectedOutputData2(batchSize);
-    for (size_t i = 0; i < splitExpectedOutputData1.size(); i++) {
-        splitExpectedOutputData1[i] = 1.0;
-        splitExpectedOutputData2[i] = 2.0;
-    }
-    const InferenceEngine::TBlob<float>::Ptr splitExpectedOutputBlob1 = InferenceEngine::make_shared_blob<float>(
-        splitOutputItem1.second->getTensorDesc(),
-        splitExpectedOutputData1.data());
-    const InferenceEngine::TBlob<float>::Ptr splitExpectedOutputBlob2 = InferenceEngine::make_shared_blob<float>(
-        splitOutputItem2.second->getTensorDesc(),
-        splitExpectedOutputData2.data());
-
-    InferenceEngine::BlobMap outputBlobs;
-
-    // Reshape
-    InferenceEngine::TBlob<float>::Ptr splitOutputBlob1 = InferenceEngine::make_shared_blob<float>(splitOutputItem1.second->getTensorDesc());
-    splitOutputBlob1->allocate();
-    outputBlobs[splitOutputItem1.first] = splitOutputBlob1;
-
-    // Split
-    InferenceEngine::TBlob<float>::Ptr splitOutputBlob2 = InferenceEngine::make_shared_blob<float>(splitOutputItem2.second->getTensorDesc());
-    splitOutputBlob2->allocate();
-    outputBlobs[splitOutputItem2.first] = splitOutputBlob2;
-
-    const InferenceEngine::BlobMap inputsBlobMap = { std::pair<std::string, InferenceEngine::Blob::Ptr>("data", inputBlob) };
-    graph.Infer(inputsBlobMap, outputBlobs);
-
-    compare(*splitOutputBlob1, *splitExpectedOutputBlob1);
-    compare(*splitOutputBlob2, *splitExpectedOutputBlob2);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithFakeOutput) {
-    std::string modelTemplate = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>8</dim>
-                    <dim>8</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="71" name="Split" precision="FP32" type="Split">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="72" name="Reshape" precision="FP32" type="Reshape">
-			<data axis="0" dim="1,64,64" num_axes="-1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="71" to-port="0"/>
-        <edge from-layer="71" from-port="%d" to-layer="72" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-    const size_t bufferForValues = 1024;
-    std::vector<char> model(modelTemplate.size() + bufferForValues);
-
-    const size_t batchHeight = 8;
-    const size_t batchWidth = 8;
-    const InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::FP32, { 1, 2, batchHeight, batchWidth }, InferenceEngine::NCHW);
-    const size_t batchSize = batchHeight * batchWidth;
-    const float channel1Value = 1.0;
-    const float channel2Value = 2.0;
-
-    InferenceEngine::Blob::Ptr inputBlob = InferenceEngine::make_shared_blob<float>(tensorDesc);
-    inputBlob->allocate();
-    float* inputData = inputBlob->buffer().as<float *>();
-    for (size_t i = 0; i < inputBlob->size(); i++) {
-        inputData[i] = (i < batchSize) ? channel1Value : channel2Value;
-    }
-
-    for (int splitFromPortNumber = 1; splitFromPortNumber <= 2; ++splitFromPortNumber) {
-        sprintf(model.data(), modelTemplate.c_str(), splitFromPortNumber);
-
-        InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, { 228 }, InferenceEngine::C });
-        weights->allocate();
-        float* weightsData = weights->buffer();
-        for (size_t i = 0ULL; i < weights->size() / sizeof(float); i++) {
-            weightsData[i] = 1.0;
-        }
-
-        const InferenceEngine::TBlob<uint8_t>::Ptr weightsPtr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-        
-        InferenceEngine::Core core;
-        InferenceEngine::CNNNetwork network;
-        ASSERT_NO_THROW(network = core.ReadNetwork(&model[0], weightsPtr));
-
-        MKLDNNGraphTestClass graph;
-        graph.CreateGraph(network);
-
-        InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
-        const std::pair<std::string, InferenceEngine::DataPtr> reshapeOutputItem = std::make_pair("Reshape", outputs["Reshape"]);
-        const std::string splitOutputName = std::string("Split.") + (splitFromPortNumber == 1 ? "1" : "0");
-        const std::pair<std::string, InferenceEngine::DataPtr> splitOutputItem = std::make_pair(splitOutputName, outputs[splitOutputName]);
-
-        std::vector<float> reshapeExpectedOutputData(batchSize);
-        std::vector<float> splitExpectedOutputData(batchSize);
-        for (size_t i = 0; i < reshapeExpectedOutputData.size(); i++) {
-            reshapeExpectedOutputData[i] = (splitFromPortNumber == 1) ? 1.0 : 2.0;
-            splitExpectedOutputData[i] = (splitFromPortNumber == 1) ? 2.0 : 1.0;
-        }
-        const InferenceEngine::TBlob<float>::Ptr reshapeExpectedOutputBlob = InferenceEngine::make_shared_blob<float>(
-            reshapeOutputItem.second->getTensorDesc(),
-            reshapeExpectedOutputData.data());
-        const InferenceEngine::TBlob<float>::Ptr splitExpectedOutputBlob = InferenceEngine::make_shared_blob<float>(
-            splitOutputItem.second->getTensorDesc(),
-            splitExpectedOutputData.data());
-
-        InferenceEngine::BlobMap outputBlobs;
-
-        // Reshape
-        InferenceEngine::TBlob<float>::Ptr reshapeOutputBlob = InferenceEngine::make_shared_blob<float>(reshapeOutputItem.second->getTensorDesc());
-        reshapeOutputBlob->allocate();
-        outputBlobs[reshapeOutputItem.first] = reshapeOutputBlob;
-
-        // Split
-        InferenceEngine::TBlob<float>::Ptr splitOutputBlob = InferenceEngine::make_shared_blob<float>(splitOutputItem.second->getTensorDesc());
-        splitOutputBlob->allocate();
-        outputBlobs[splitOutputItem.first] = splitOutputBlob;
-
-        const InferenceEngine::BlobMap inputsBlobMap = { std::pair<std::string, InferenceEngine::Blob::Ptr>("data", inputBlob) };
-        graph.Infer(inputsBlobMap, outputBlobs);
-
-        compare(*reshapeOutputBlob, *reshapeExpectedOutputBlob);
-        compare(*splitOutputBlob, *splitExpectedOutputBlob);
-    }
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithMultipleData) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>8</dim>
-                    <dim>8</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="1" name="split" precision="FP32" type="Split">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="2" name="reshape1" precision="FP32" type="Reshape">
-			<data axis="0" dim="1,64,64" num_axes="-1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</output>
-		</layer>
-        <layer id="3" name="reshape2" precision="FP32" type="Reshape">
-			<data axis="0" dim="1,64,64" num_axes="-1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</output>
-		</layer>
-        <layer id="4" name="reshape3" precision="FP32" type="Reshape">
-			<data axis="0" dim="1,64,64" num_axes="-1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
-        <edge from-layer="1" from-port="1" to-layer="3" to-port="0"/>
-        <edge from-layer="1" from-port="2" to-layer="4" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-
-    const size_t batchHeight = 8;
-    const size_t batchWidth = 8;
-    const InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::FP32, { 1, 2, batchHeight, batchWidth }, InferenceEngine::NCHW);
-    const size_t batchSize = batchHeight * batchWidth;
-    const float channel1Value = 1.0;
-    const float channel2Value = 2.0;
-
-    InferenceEngine::Blob::Ptr inputBlob = InferenceEngine::make_shared_blob<float>(tensorDesc);
-    inputBlob->allocate();
-    float* inputData = inputBlob->buffer().as<float *>();
-    for (size_t i = 0; i < inputBlob->size(); i++) {
-        inputData[i] = (i < batchSize) ? channel1Value : channel2Value;
-    }
-
-    InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>({ InferenceEngine::Precision::U8, { 228 }, InferenceEngine::C });
-    weights->allocate();
-    float* weightsData = weights->buffer();
-    for (size_t i = 0ULL; i < weights->size() / sizeof(float); i++) {
-        weightsData[i] = 1.0;
-    }
-
-    const InferenceEngine::TBlob<uint8_t>::Ptr weightsPtr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
-        
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weightsPtr));
-
-    network.addOutput("split");
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(network);
-
-    const auto& nodes = graph.getNodes();
-    ASSERT_EQ(nodes.size(), 12);
-    ASSERT_EQ(nodes[0]->getType(), MKLDNNPlugin::Type::Input);
-    ASSERT_EQ(nodes[1]->getType(), MKLDNNPlugin::Type::Split);
-    ASSERT_EQ(nodes[2]->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[3]->getType(), MKLDNNPlugin::Type::Reshape);
-    ASSERT_EQ(nodes[4]->getType(), MKLDNNPlugin::Type::Output);
-    ASSERT_EQ(nodes[5]->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[6]->getType(), MKLDNNPlugin::Type::Reshape);
-    ASSERT_EQ(nodes[7]->getType(), MKLDNNPlugin::Type::Output);
-    ASSERT_EQ(nodes[8]->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[9]->getType(), MKLDNNPlugin::Type::Reshape);
-    ASSERT_EQ(nodes[10]->getType(), MKLDNNPlugin::Type::Output);
-    ASSERT_EQ(nodes[11]->getType(), MKLDNNPlugin::Type::Output);
-
-    InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
-    std::vector<std::pair<std::string, InferenceEngine::DataPtr>> outputItems = {
-        std::make_pair("reshape1", outputs.find("reshape1")->second),
-        std::make_pair("reshape2", outputs.find("reshape2")->second),
-        std::make_pair("reshape3", outputs.find("reshape3")->second),
-        std::make_pair("split.0", outputs.find("split.0")->second)
-    };
-
-    std::vector<std::vector<float>> expectedOutputData = {
-        std::vector<float>(batchSize),
-        std::vector<float>(batchSize),
-        std::vector<float>(batchSize),
-        std::vector<float>(batchSize)
-    };
-    for (size_t i = 0; i < batchSize; i++) {
-        expectedOutputData[0][i] = channel1Value;
-        expectedOutputData[1][i] = channel1Value;
-        expectedOutputData[2][i] = channel2Value;
-
-        expectedOutputData[3][i] = channel1Value;
-    }
-
-    std::vector<InferenceEngine::TBlob<float>::Ptr> expectedOutputBlobs(outputs.size());
-    for (size_t i = 0; i < outputs.size(); i++) {
-        expectedOutputBlobs[i] = InferenceEngine::make_shared_blob<float>(
-            outputItems[i].second->getTensorDesc(),
-            expectedOutputData[i].data());
-    }
-
-    std::vector<InferenceEngine::TBlob<float>::Ptr> outputBlobs;
-    outputBlobs.reserve(outputItems.size());
-
-    InferenceEngine::BlobMap outputBlobsMap;
-    for(const std::pair<std::string, InferenceEngine::DataPtr>& item : outputItems) {
-        InferenceEngine::TBlob<float>::Ptr blob = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
-        outputBlobs.push_back(blob);
-        blob->allocate();
-        outputBlobsMap[item.first] = blob;
-    }
-
-    const InferenceEngine::BlobMap inputsBlobMap = { std::pair<std::string, InferenceEngine::Blob::Ptr>("data", inputBlob) };
-    graph.Infer(inputsBlobMap, outputBlobsMap);
-
-    for(size_t i = 0; i < 3; i++) {
-        compare(*outputBlobs[i], *expectedOutputBlobs[i]);
-    }
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithMultipleData_2) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>2</dim>
-                    <dim>8</dim>
-                    <dim>8</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="1" name="split" precision="FP32" type="Split">
-			<data axis="1"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>2</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-				<port id="2">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</output>
-		</layer>
-		<layer id="2" name="power" precision="FP32" type="Power">
-			<data power="1" scale="-1.0" shift="0.0"/>
-			<input>
-				<port id="0">
-					<dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</input>
-			<output>
-				<port id="1">
-                    <dim>1</dim>
-					<dim>1</dim>
-					<dim>8</dim>
-					<dim>8</dim>
-				</port>
-			</output>
-		</layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-    using namespace InferenceEngine;
-
-    const size_t H = 8;
-    const size_t W = 8;
-    const size_t imgSz = H * W;
-    const float channel1Value = 1.0;
-    const float channel2Value = 2.0;
-
-    const auto weights = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, SizeVector{0}, Layout::C));
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork net;
-    ASSERT_NO_THROW(net = core.ReadNetwork(model, weights));
-
-    net.addOutput("split", 0);
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(net);
-
-    auto inBlob   = make_shared_blob<float>({ Precision::FP32, SizeVector{1, 2, H, W}, Layout::NCHW });
-    auto outBlob1 = make_shared_blob<float>({ Precision::FP32, SizeVector{1, 1, H, W}, Layout::NCHW });
-    auto outBlob2 = make_shared_blob<float>({ Precision::FP32, SizeVector{1, 1, H, W}, Layout::NCHW });
-    auto outBlob3 = make_shared_blob<float>({ Precision::FP32, SizeVector{1, 1, H, W}, Layout::NCHW });
-
-    inBlob->allocate();
-    outBlob1->allocate();
-    outBlob2->allocate();
-    outBlob3->allocate();
-
-    auto in_ptr = inBlob->buffer().as<float*>();
-    for (int i = 0; i < imgSz; i++) {
-        in_ptr[i] = channel1Value;
-        in_ptr[i + imgSz] = channel2Value;
-    }
-
-    BlobMap inputBlobMap  = { {"data"   , inBlob  } },
-            outputBlobMap = { {"split.0", outBlob1},
-                              {"split.1", outBlob2},
-                              {"power"  , outBlob3} };
-
-    graph.Infer(inputBlobMap, outputBlobMap);
-
-    auto out_check = [] ( Blob::Ptr blob, float val) {
-        auto size = blob->size();
-        auto ptr = blob->buffer().as<float*>();
-        bool res = true;
-        for (int i = 0; i < size; i++)
-            res &= ( std::abs( ptr[i] - val ) < 0.00001f );
-        return res;
-    };
-
-    EXPECT_TRUE(out_check(outBlob1,  1));
-    EXPECT_TRUE(out_check(outBlob2,  2));
-    EXPECT_TRUE(out_check(outBlob3, -1));
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestCreateGraphAllDataToConcat) {
-    std::shared_ptr<ngraph::Function> function;
-    {
-        ngraph::element::Type elementType = ngraph::element::Type_t::f32;
-        ngraph::Shape shape { 1, 1, 4, 5 };
-        auto input = std::make_shared<ngraph::op::Parameter>(elementType, shape);
-        input->set_friendly_name("input");
-
-        auto weights1 = std::make_shared<ngraph::op::Constant>(
-            elementType, ngraph::Shape{1, 1, 1, 1}, std::vector<float>(1, 2.0f));
-        auto conv1 = std::make_shared<ngraph::op::v1::Convolution>(
-            input, weights1, ngraph::Strides { 1, 1 },
-            ngraph::CoordinateDiff { 0, 0 }, ngraph::CoordinateDiff{0, 0}, ngraph::Strides { 1, 1 });
-
-        auto weights2 = std::make_shared<ngraph::op::Constant>(
-            elementType, ngraph::Shape{1, 1, 1, 1}, std::vector<float>(1, 3.0f));
-        auto conv2 = std::make_shared<ngraph::op::v1::Convolution>(
-            input, weights2, ngraph::Strides { 1, 1 },
-            ngraph::CoordinateDiff { 0, 0 }, ngraph::CoordinateDiff{0, 0}, ngraph::Strides { 1, 1 });
-
-        auto concat = std::make_shared<ngraph::op::Concat>(ngraph::OutputVector { input, conv1, conv2 }, 1);
-        concat->set_friendly_name("concat");
-        auto result = std::make_shared<ngraph::op::Result>(concat);
-
-        ngraph::ResultVector results { result };
-        ngraph::ParameterVector params { input };
-        function = std::make_shared<ngraph::Function>(results, params);
-    }
-
-    auto cnn = InferenceEngine::CNNNetwork(function);
-
-    // Load the network
-    std::vector<size_t> inpSize = {1, 1, 4, 5};
-    std::vector<size_t> outSize = {1, 3, 4, 5};
-
-    InferenceEngine::BlobMap inputBlobs;
-    InferenceEngine::BlobMap outputBlobs;
-
-    std::vector<float> inpData(4*5, 1);
-    std::vector<float> outData(3*4*5, 1);
-    for (int i = 0; i < 4*5; ++i) {
-        inpData[i] = i;
-    }
-
-    inputBlobs["input"] = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, inpSize,
-        InferenceEngine::TensorDesc::getLayoutByDims(inpSize) }, &inpData[0]);
-    outputBlobs["concat"] = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, outSize,
-        InferenceEngine::TensorDesc::getLayoutByDims(outSize) }, &outData[0]);
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(cnn);
-    graph.Infer(inputBlobs, outputBlobs);
-
-    std::vector<float> refDst = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
-                                 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
-                                 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57};
-
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(outputBlobs["concat"]->getTensorDesc(), refDst.data());
-
-    compare(*outputBlobs["concat"], *dstOut);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestCreateGraphAllDataFromInputToConcat) {
-    std::shared_ptr<ngraph::Function> function;
-    {
-        ngraph::element::Type elementType = ngraph::element::Type_t::f32;
-        auto input = std::make_shared<ngraph::op::Parameter>(elementType, ngraph::Shape { 1, 1, 4, 5 });
-        input->set_friendly_name("input");
-        auto concat = std::make_shared<ngraph::op::Concat>(ngraph::OutputVector { input, input, input }, 1);
-        concat->set_friendly_name("concat");
-        auto result = std::make_shared<ngraph::op::Result>(concat);
-
-        ngraph::ResultVector results { result };
-        ngraph::ParameterVector params { input };
-        function = std::make_shared<ngraph::Function>(results, params);
-    }
-
-    auto cnn = InferenceEngine::CNNNetwork(function);
-
-    // Load the network
-    std::vector<size_t> inpSize = {1, 1, 4, 5};
-    std::vector<size_t> outSize = {1, 3, 4, 5};
-
-    InferenceEngine::BlobMap inputBlobs;
-    InferenceEngine::BlobMap outputBlobs;
-
-    std::vector<float> inpData(4*5, 1);
-    std::vector<float> outData(3*4*5, 1);
-    for (int i = 0; i < 4*5; ++i)
-    {
-        inpData[i] = i;
-    }
-
-    inputBlobs["input"] = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, inpSize,
-        InferenceEngine::TensorDesc::getLayoutByDims(inpSize) }, &inpData[0]);
-    outputBlobs["concat"] = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, outSize,
-        InferenceEngine::TensorDesc::getLayoutByDims(outSize) }, &outData[0]);
-
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(cnn);
-    graph.Infer(inputBlobs, outputBlobs);
-
-    std::vector<float> refDst = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
-                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
-                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,};
-
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(outputBlobs["concat"]->getTensorDesc(), refDst.data());
-
-    compare(*outputBlobs["concat"], *dstOut);
-}
-
-
-TEST_F(MKLDNNGraphStructureTests, TestCheckIncorrectScaleShift) {
-    std::string model = R"V0G0N(
-<net name="net" version="2" batch="1">
-    <layers>
-        <layer name="data" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>1000</dim>
-                    <dim>16</dim>
-                </port>
-            </output>
-        </layer>
-        <layer id="1" name="test" precision="FP32" type="ScaleShift">
-            <input>
-                <port id="0">
-                    <dim>1</dim>
-                    <dim>1000</dim>
-                    <dim>16</dim>
-                </port>
-            </input>
-            <output>
-                <port id="3">
-                    <dim>1</dim>
-                    <dim>100</dim>
-                    <dim>16</dim>
-                </port>
-            </output>
-            <blobs>
-                <weights offset="0" size="64"/>
-                <biases offset="0" size="64"/>
-            </blobs>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
-    </edges>
-</net>
-)V0G0N";
-    using namespace InferenceEngine;
-    auto weights = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, SizeVector{64}, Layout::C));
-    weights->allocate();
-
-    InferenceEngine::Core core;
-    InferenceEngine::CNNNetwork network;
-    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights));
-
-    MKLDNNGraphTestClass graph;
-    ASSERT_THROW(graph.CreateGraph(network), InferenceEngine::Exception);
-}
-
-TEST_F(MKLDNNGraphStructureTests, TestConcatWithFourInputs) {
-    std::shared_ptr<ngraph::Function> function;
-    {
-        ngraph::element::Type elementType = ngraph::element::Type_t::f32;
-        ngraph::Shape shape { 1, 1, 4, 5 };
-        auto input = std::make_shared<ngraph::op::Parameter>(elementType, shape);
-        input->set_friendly_name("input");
-
-        auto weights1 = std::make_shared<ngraph::op::Constant>(
-            elementType, ngraph::Shape{1, 1, 1, 1}, std::vector<float>(1, 2.0f));
-        auto conv1 = std::make_shared<ngraph::op::v1::Convolution>(
-            input, weights1, ngraph::Strides { 1, 1 },
-            ngraph::CoordinateDiff { 0, 0 }, ngraph::CoordinateDiff{0, 0}, ngraph::Strides { 1, 1 });
-
-        auto weights2 = std::make_shared<ngraph::op::Constant>(
-            elementType, ngraph::Shape{1, 1, 1, 1}, std::vector<float>(1, 3.0f));
-        auto conv2 = std::make_shared<ngraph::op::v1::Convolution>(
-            input, weights2, ngraph::Strides { 1, 1 },
-            ngraph::CoordinateDiff { 0, 0 }, ngraph::CoordinateDiff{0, 0}, ngraph::Strides { 1, 1 });
-
-        auto weights3 = std::make_shared<ngraph::op::Constant>(
-            elementType, ngraph::Shape{1, 1, 1, 1}, std::vector<float>(1, -1.0f));
-        auto conv3 = std::make_shared<ngraph::op::v1::Convolution>(
-            input, weights3, ngraph::Strides { 1, 1 },
-            ngraph::CoordinateDiff { 0, 0 }, ngraph::CoordinateDiff{0, 0}, ngraph::Strides { 1, 1 });
-
-        auto concat = std::make_shared<ngraph::op::Concat>(ngraph::OutputVector { input, conv1, conv2, conv3 }, 1);
-        concat->set_friendly_name("concat");
-        auto result = std::make_shared<ngraph::op::Result>(concat);
-
-        ngraph::ResultVector results { result };
-        ngraph::ParameterVector params { input };
-        function = std::make_shared<ngraph::Function>(results, params);
-    }
-
-    auto cnn = InferenceEngine::CNNNetwork(function);
-
-    // Load the network
-    std::vector<size_t> inpSize = {1, 1, 4, 5};
-    std::vector<size_t> outSize = {1, 4, 4, 5};
-
-    InferenceEngine::BlobMap inputBlobs;
-    InferenceEngine::BlobMap outputBlobs;
-
-    std::vector<float> inpData(4*5, 1);
-    std::vector<float> outData(4*4*5, 1);
-    for (int i = 0; i < 4*5; ++i) {
-        inpData[i] = i;
-    }
-
-    inputBlobs["input"] = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, inpSize,
-        InferenceEngine::TensorDesc::getLayoutByDims(inpSize) }, &inpData[0]);
-    outputBlobs["concat"] = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, outSize,
-        InferenceEngine::TensorDesc::getLayoutByDims(outSize) }, &outData[0]);
-
-
-    MKLDNNGraphTestClass graph;
-    graph.CreateGraph(cnn);
-    graph.Infer(inputBlobs, outputBlobs);
-
-    std::vector<float> refDst = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
-                                 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
-                                 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57,
-                                 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19};
-
-    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(outputBlobs["concat"]->getTensorDesc(), refDst.data());
-
-    compare(*outputBlobs["concat"], *dstOut);
-}
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp
deleted file mode 100644
index 7a341f15f22..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp
+++ /dev/null
@@ -1,362 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-// WA for windows.h
-#ifdef _WIN32
-# ifndef NOMINMAX
-#  define NOMINMAX
-# endif
-# ifndef _WINSOCKAPI_
-#  define _WINSOCKAPI_
-# endif
-# ifndef _WINSOCK2API_
-#  define _WINSOCK2API_
-# endif
-#endif
-
-#include <gtest/gtest.h>
-#include <legacy/cnn_network_impl.hpp>
-#include <nodes/list.hpp>
-#include <mkldnn_graph.h>
-#include <mkldnn_memory.h>
-#include <mkldnn_extension_utils.h>
-#include <mkldnn_graph_optimizer.h>
-#include <nodes/mkldnn_input_node.h>
-#include <functional>
-#include <cmath>
-#include <legacy/details/ie_cnn_network_tools.h>
-
-#define GARB_VAL(x) ((x + 100.0f + sin(x)) / (x + 150.f))
-
-class MKLDNNGraphTestClass: public MKLDNNPlugin::MKLDNNGraph {
-private:
-    MKLDNNPlugin::MKLDNNExtensionManager::Ptr extensionManager = std::make_shared<MKLDNNPlugin::MKLDNNExtensionManager>();
-
-public:
-    enum class CheckDynBatchType {
-        Both,
-        Parent,
-        Child
-    };
-    MKLDNNGraphTestClass(): MKLDNNPlugin::MKLDNNGraph() {
-        auto defaultExtensions = std::make_shared<InferenceEngine::Extensions::Cpu::MKLDNNExtensions>();
-        extensionManager->AddExtension(defaultExtensions);
-
-    }
-    virtual ~MKLDNNGraphTestClass() = default;
-
-    static std::string getStrPrimitiveDescriptorType(MKLDNNPlugin::impl_desc_type type) {
-        std::string str_type;
-
-        auto add_type = [&](std::string t) {
-            if (!str_type.empty() && t.c_str()[0] != '_')
-                str_type += "_";
-            str_type += t;
-        };
-
-#define SEARCH_TYPE(_type)                                                                      \
-    if ((type & MKLDNNPlugin::impl_desc_type::_type) == MKLDNNPlugin::impl_desc_type::_type)    \
-        add_type(#_type)
-
-        SEARCH_TYPE(undef);
-        SEARCH_TYPE(reorder);
-        SEARCH_TYPE(jit);
-        SEARCH_TYPE(gemm);
-        SEARCH_TYPE(ref);
-
-        SEARCH_TYPE(avx512);
-        SEARCH_TYPE(avx2);
-        SEARCH_TYPE(sse42);
-        SEARCH_TYPE(blas);
-        SEARCH_TYPE(any);
-
-        SEARCH_TYPE(winograd);
-        SEARCH_TYPE(_dw);
-        SEARCH_TYPE(_1x1);
-
-        if (type == MKLDNNPlugin::impl_desc_type::unknown)
-            str_type = "unknown";
-        else if (str_type.empty())
-            str_type = "undef";
-        return str_type;
-    }
-
-    void PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in, int batch) {
-        if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready.";
-
-        auto input = inputNodes.find(name);
-        if (input != inputNodes.end()) {
-            MKLDNNPlugin::MKLDNNDims outDims;
-            if(input->second->getChildEdgeAt(0)->getDims().ndims() == 0 )
-                outDims = MKLDNNPlugin::MKLDNNDims(InferenceEngine::SizeVector(1,1));
-            else
-                outDims = input->second->getChildEdgeAt(0)->getDims();
-            if (batch < 1)
-                batch = outDims[0];
-
-            const void *ext_data_ptr = in->cbuffer();
-            void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData();
-
-            if (ext_data_ptr != inter_data_ptr) {
-                MKLDNNPlugin::MKLDNNMemoryDesc ext_tdesc(in->getTensorDesc());
-
-                if (ext_tdesc.getDims().ndims() == 0) {
-                    ext_tdesc = MKLDNNPlugin::MKLDNNMemoryDesc{ {1}, ext_tdesc.getDataType(), mkldnn::memory::format_tag::a};
-                }
-
-                MKLDNNPlugin::MKLDNNMemory ext_mem(eng);
-                ext_mem.Create(ext_tdesc, ext_data_ptr, false);
-
-                input->second->getChildEdgeAt(0)->getMemory().SetData(ext_mem, in->byteSize() / outDims[0] * batch, false);
-            }
-
-            // todo: make sure 'name' exists in this map...
-            if (_meanImages.find(name) != _meanImages.end()) {
-                if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
-                    _meanImages[name].Subtract(outDims, reinterpret_cast<float *>(inter_data_ptr), in->getTensorDesc().getLayout());
-                } else {
-                    IE_THROW() << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported";
-                }
-            }
-        } else {
-            IE_THROW() << "Input blob for infer '" << name << "' doesn't correspond to input in network";
-        }
-    }
-
-    void Infer(const InferenceEngine::BlobMap& inputs, InferenceEngine::BlobMap& result, int batch = -1) {
-        try {
-            // need to retain converted blobs until infer finish
-            std::vector<InferenceEngine::Blob::Ptr> convertedInputs;
-            for (auto input : inputs) {
-                switch (input.second->getTensorDesc().getPrecision()) {
-                    case InferenceEngine::Precision::FP32: {
-                        InferenceEngine::TBlob<float> *in_f = nullptr;
-                        in_f = dynamic_cast<InferenceEngine::TBlob<float> *>(input.second.get());
-                        if (in_f == nullptr) {
-                            FAIL() << "Input data precision not supported. Expected float.";
-                        }
-
-                        if (in_f->readOnly() == nullptr) {
-                            IE_THROW() << "Input data was not allocated.";
-                        }
-                    }
-                    break;
-                    case InferenceEngine::Precision::I32: {
-                        InferenceEngine::TBlob<int32_t> *in_f = nullptr;
-                        in_f = dynamic_cast<InferenceEngine::TBlob<int32_t> *>(input.second.get());
-                        if (in_f == nullptr) {
-                            FAIL() << "Input data precision not supported. Expected float.";
-                        }
-
-                        if (in_f->readOnly() == nullptr) {
-                            IE_THROW() << "Input data was not allocated.";
-                        }
-                    }
-                    break;
-                    case InferenceEngine::Precision::U16: {
-                        InferenceEngine::TBlob<uint16_t> *in_f = nullptr;
-                        in_f = dynamic_cast<InferenceEngine::TBlob<uint16_t> *>(input.second.get());
-                        if (in_f == nullptr) {
-                            FAIL() << "Input data precision not supported. Expected float.";
-                        }
-
-                        if (in_f->readOnly() == nullptr) {
-                            IE_THROW() << "Input data was not allocated.";
-                        }
-                    }
-                    break;
-                    case InferenceEngine::Precision::I16: {
-                        InferenceEngine::TBlob<int16_t> *in_f = nullptr;
-                        in_f = dynamic_cast<InferenceEngine::TBlob<int16_t> *>(input.second.get());
-                        if (in_f == nullptr) {
-                            FAIL() << "Input data precision not supported. Expected float.";
-                        }
-
-                        if (in_f->readOnly() == nullptr) {
-                            IE_THROW() << "Input data was not allocated.";
-                        }
-                    }
-                    break;
-                    case InferenceEngine::Precision::U8: {
-                        InferenceEngine::TBlob<uint8_t> *in_f = nullptr;
-                        in_f = dynamic_cast<InferenceEngine::TBlob<uint8_t> *>(input.second.get());
-                        if (in_f == nullptr) {
-                            FAIL() << "Input data precision not supported. Expected float.";
-                        }
-
-                        if (in_f->readOnly() == nullptr) {
-                            IE_THROW() << "Input data was not allocated.";
-                        }
-                    }
-                    break;
-                    case InferenceEngine::Precision::I8: {
-                        InferenceEngine::TBlob<int8_t> *in_f = nullptr;
-                        in_f = dynamic_cast<InferenceEngine::TBlob<int8_t> *>(input.second.get());
-                        if (in_f == nullptr) {
-                            FAIL() << "Input data precision not supported. Expected float.";
-                        }
-
-                        if (in_f->readOnly() == nullptr) {
-                            IE_THROW() << "Input data was not allocated.";
-                        }
-                    }
-                    break;
-                    default:
-                        IE_THROW() << "Unsupported input precision " << input.second->getTensorDesc().getPrecision();
-                }
-
-                PushInputData(input.first, input.second, batch);
-            }
-            MKLDNNPlugin::MKLDNNGraph::Infer(nullptr, batch);
-        } catch (const std::exception &e) {
-            FAIL() << e.what();
-        }
-
-        PullOutputData(result);
-    }
-
-    std::vector<MKLDNNPlugin::MKLDNNNodePtr>& getNodes() {
-        return graphNodes;
-    }
-
-    void MoveInternalBlobsToConstLayers(InferenceEngine::details::CNNNetworkImpl* netImpl) {
-        auto createConstInputTo = [&](InferenceEngine::CNNLayerPtr layer, InferenceEngine::Blob::Ptr blob, std::string name) {
-            InferenceEngine::LayerParams attrs = {layer.get()->name + "_const_" + name, "Const", InferenceEngine::Precision::FP32};
-            auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
-            constLayer->blobs["custom"] = blob;
-
-            std::vector<size_t> constDims(layer->insData[0].lock()->getDims().size(), 1);
-            if (constDims.size() > 1)
-                constDims[1] = blob.get()->size();
-            else
-                constDims[0] = blob.get()->size();
-            const InferenceEngine::TensorDesc& td = {InferenceEngine::Precision::FP32, constDims, InferenceEngine::TensorDesc::getLayoutByDims(constDims)};
-
-            InferenceEngine::DataPtr newEdgeAfterLayer(new InferenceEngine::Data(constLayer->name, td));
-            newEdgeAfterLayer->setName(constLayer->name);
-            getCreatorLayer(newEdgeAfterLayer) = constLayer;
-            getInputTo(newEdgeAfterLayer).clear();
-
-
-            netImpl->addData(constLayer->name.c_str(), newEdgeAfterLayer);
-            IE_SUPPRESS_DEPRECATED_START
-            netImpl->addLayer(constLayer);
-            IE_SUPPRESS_DEPRECATED_END
-
-            constLayer->outData.push_back(newEdgeAfterLayer);
-            getInputTo(newEdgeAfterLayer)[layer->name] = layer;
-            layer->insData.push_back(newEdgeAfterLayer);
-        };
-
-        auto all_layers = InferenceEngine::details::CNNNetSortTopologically(
-            InferenceEngine::CNNNetwork(netImpl->shared_from_this()));
-        for (auto &layer : all_layers) {
-            if (layer->type == "ScaleShift" && layer->insData.size() == 1) {
-                InferenceEngine::Blob::Ptr scalesBlob = layer->blobs["weights"];
-                if (scalesBlob != nullptr)
-                    createConstInputTo(layer, scalesBlob, "weights");
-
-                InferenceEngine::Blob::Ptr shiftBlob = layer->blobs["biases"];
-                if (shiftBlob != nullptr)
-                    createConstInputTo(layer, shiftBlob, "biases");
-            } else if (layer->type == "PReLU" && layer->insData.size() == 1) {
-                InferenceEngine::Blob::Ptr scalesBlob = layer->blobs["weights"];
-                if (scalesBlob != nullptr)
-                    createConstInputTo(layer, scalesBlob, "weights");
-            }
-        }
-    }
-
-    void CreateGraph(InferenceEngine::CNNNetwork &network, const MKLDNNPlugin::MKLDNNExtensionManager::Ptr& extMgr,
-            MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache = {}) {
-        if (network.getFunction()) {
-            auto convertedNetwork = std::make_shared<InferenceEngine::details::CNNNetworkImpl>(network);
-            MoveInternalBlobsToConstLayers(convertedNetwork.get());
-            MKLDNNGraph::CreateGraph(InferenceEngine::CNNNetwork(convertedNetwork), extMgr, cache);
-        } else {
-            auto & icnnnet = static_cast<InferenceEngine::ICNNNetwork&>(network);
-            InferenceEngine::details::CNNNetworkImpl* netImpl = static_cast<InferenceEngine::details::CNNNetworkImpl*>(&icnnnet);
-            MoveInternalBlobsToConstLayers(netImpl);
-            MKLDNNGraph::CreateGraph(network, extMgr, cache);
-        }
-    }
-
-    void CreateGraph(InferenceEngine::CNNNetwork &network) {
-        MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache;
-        if (network.getFunction()) {
-            auto convertedNetwork = std::make_shared<InferenceEngine::details::CNNNetworkImpl>(network);
-            MoveInternalBlobsToConstLayers(convertedNetwork.get());
-            MKLDNNGraph::CreateGraph(InferenceEngine::CNNNetwork(convertedNetwork), extensionManager, cache);
-        } else {
-            auto & icnnnet = static_cast<InferenceEngine::ICNNNetwork&>(network);
-            InferenceEngine::details::CNNNetworkImpl* netImpl = static_cast<InferenceEngine::details::CNNNetworkImpl*>(&icnnnet);
-            MoveInternalBlobsToConstLayers(netImpl);
-            MKLDNNGraph::CreateGraph(network, extensionManager, cache);
-        }
-    }
-
-    void checkDynBatch(InferenceEngine::BlobMap& srcs, InferenceEngine::BlobMap& outputBlobs, int batch, size_t MB,
-                       const std::function<bool (const MKLDNNPlugin::MKLDNNNodePtr&)>& comp, CheckDynBatchType type = CheckDynBatchType::Both) {
-        for (auto &node : getNodes()) {
-            if (comp(node)) {
-                auto inputBlob = node->getParentEdgeAt(0)->getBlob();
-                auto *data = inputBlob->buffer().as<float *>();
-                size_t dataSize = inputBlob->getTensorDesc().getBlockingDesc().getStrides()[0] * MB;
-                for (size_t j = 0; j < dataSize; j++) {
-                    data[j] = GARB_VAL(j);
-                }
-
-                auto outputBlob = node->getChildEdgeAt(0)->getBlob();
-                data = outputBlob->buffer().as<float *>();
-                dataSize = outputBlob->getTensorDesc().getBlockingDesc().getStrides()[0] * MB;
-                for (size_t j = 0; j < dataSize; j++) {
-                    data[j] = GARB_VAL(j);
-                }
-            }
-        }
-
-        Infer(srcs, outputBlobs, batch);
-
-        for (auto &node : getNodes()) {
-            if (comp(node)) {
-                auto inputBlob = node->getParentEdgeAt(0)->getBlob();
-                auto *data = inputBlob->buffer().as<float *>();
-                auto inputNoBatchSize = inputBlob->getTensorDesc().getBlockingDesc().getStrides()[0];
-                for (size_t i = 0; i < batch; i++) {
-                    for (size_t j = 0; j < inputNoBatchSize; j++) {
-                        ASSERT_NE(data[i*inputNoBatchSize + j], GARB_VAL(i*inputNoBatchSize + j));
-                    }
-                }
-
-                if (type == CheckDynBatchType::Both || type == CheckDynBatchType::Parent) {
-                    for (size_t i = static_cast<size_t>(batch); i < MB; i++) {
-                        for (size_t j = 0; j < inputNoBatchSize; j++) {
-                            ASSERT_NEAR(data[i * inputNoBatchSize + j],
-                                        GARB_VAL(i * inputNoBatchSize + j), 0.001f);
-                        }
-                    }
-                }
-
-                auto outputBlob = node->getChildEdgeAt(0)->getBlob();
-                data = outputBlob->buffer().as<float *>();
-                auto outputNoBatchSize = outputBlob->getTensorDesc().getBlockingDesc().getStrides()[0];
-                for (size_t i = 0; i < batch; i++) {
-                    for (size_t j = 0; j < outputNoBatchSize; j++) {
-                        ASSERT_NE(data[i*outputNoBatchSize + j], GARB_VAL(i*outputNoBatchSize + j));
-                    }
-                }
-                if (type == CheckDynBatchType::Both || type == CheckDynBatchType::Child) {
-                    for (size_t i = static_cast<size_t>(batch); i < MB; i++) {
-                        for (size_t j = 0; j < outputNoBatchSize; j++) {
-                            ASSERT_NEAR(data[i * outputNoBatchSize + j],
-                                        GARB_VAL(i * outputNoBatchSize + j), 0.001f);
-                        }
-                    }
-                }
-            }
-        }
-    }
-};
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/mkldnn_primitive_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/mkldnn_primitive_test.cpp
deleted file mode 100644
index 743f433131c..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/mkldnn_primitive_test.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <gmock/gmock.h>
-#include "mkldnn_memory.h"
-#include "mkldnn_graph.h"
-
-using namespace std;
-using namespace MKLDNNPlugin;
-using namespace mkldnn;
-using namespace ::testing;
-
-class MKLDNNPrimitiveTest : public ::testing::Test {
-protected:
-    virtual void TearDown() override{
-    }
-
-    virtual void SetUp() override{
-    }
-};
-
-//class ChildConv : public MKLDNNConvolution {
-// public:
-//    explicit ChildConv(const engine& eng) : MKLDNNConvolution(eng) {}
-//    // Add the following two lines to the mock class.
-//    MOCK_METHOD0(die, void());
-//    ~ChildConv () { die(); }
-//};
-
-
-TEST_F(MKLDNNPrimitiveTest, DISABLED_canDeleteWeightInweitableLayer) {
-    //simulate how convlayer gets created
-    engine e(engine::kind::cpu, 0);
-    //auto node = MKLDNNPlugin::MKLDNNNodePtr(MKLDNNPlugin::MKLDNNNode::factory().create(MKLDNNPlugin::Generic, InferenceEngine::Precision::FP32, ""));
-//    ChildConv *conv = new ChildConv(e);
-//    EXPECT_CALL(*conv, die()).Times(1);
-
-    std::vector<float> weights = {1,2,3,4};
-    std::vector<void *> weightsData = {(void*)&*weights.begin()};
-    std::vector <size_t> weightsSize = {weights.size() * sizeof(float)};
-
-    memory::dims dims(4);
-    dims[0] = weights.size();
-
-//    conv->CreateWeightsMemory(dims, memory::f32, memory::nchw);
-//    conv->SetWeights(weightsData, weightsSize);
-    FAIL() << "Should change the test";
-//    node->SetPrimitive(conv);
-//    node.reset();
-
-//    Mock::VerifyAndClear(conv);
-}
\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/test_layers.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/test_layers.cpp
deleted file mode 100644
index e72cab66708..00000000000
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/test_layers.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-//#include <gtest/gtest.h>
-//#include "mkldnn_layers.h"
-//
-//using namespace std;
-//
-//class MKLDNNLayersTests : public ::testing::Test {
-//protected:
-//    virtual void TearDown() override{
-//    }
-//
-//    virtual void SetUp() override{
-//    }
-//
-//};
-//
-//TEST_F(MKLDNNLayersTests, canCreateContext) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    unique_ptr<MKLDNNPlugin::Context> dl ( new MKLDNNPlugin::Context({}, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(dl.get()));
-//}
-//
-//TEST_F(MKLDNNLayersTests, canCreateConvLayer) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    InferenceEngine::TBlob<float>::Ptr blobPtr(new InferenceEngine::TBlob<float>());
-//    unique_ptr<MKLDNNPlugin::Context> ctx ( new MKLDNNPlugin::Context(blobPtr, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get()));
-//
-//    InferenceEngine::ConvolutionLayer convLayer({});
-//    InferenceEngine::DataPtr dPtr(new InferenceEngine::Data("testData"));
-//    dPtr->dims = {0, 0, 0, 0};
-//
-//    convLayer.insData.push_back(dPtr);
-//    convLayer.outData.push_back(dPtr);
-//    unique_ptr<MKLDNNPlugin::Layer> dl ( MKLDNNPlugin::LayerRegistry::CreateLayer(&convLayer, nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get())));
-//
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::ConvLayer*>(dl.get()));
-//}
-//
-//TEST_F(MKLDNNLayersTests, canCreateLRNLayer) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    unique_ptr<MKLDNNPlugin::Context> ctx ( new MKLDNNPlugin::Context({}, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get()));
-//
-//    InferenceEngine::NormLayer normLayer({});
-//    InferenceEngine::DataPtr dPtr(new InferenceEngine::Data("testData"));
-//    dPtr->dims = {1, 1, 27, 27};
-//
-//    normLayer.insData.push_back(dPtr);
-//    normLayer.outData.push_back(dPtr);
-//    unique_ptr<MKLDNNPlugin::Layer> dl ( MKLDNNPlugin::LayerRegistry::CreateLayer(&normLayer, nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get())));
-//
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::LRNLayer*>(dl.get()));
-//}
-//
-//TEST_F(MKLDNNLayersTests, canCreatePoolingLayer) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    unique_ptr<MKLDNNPlugin::Context> ctx ( new MKLDNNPlugin::Context({}, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get()));
-//
-//    InferenceEngine::PoolingLayer poolingLayer({});
-//    InferenceEngine::DataPtr dPtr(new InferenceEngine::Data("testData"));
-//    dPtr->dims = {1, 1, 27, 27};
-//
-//    poolingLayer.insData.push_back(dPtr);
-//    poolingLayer.outData.push_back(dPtr);
-//    unique_ptr<MKLDNNPlugin::Layer> dl ( MKLDNNPlugin::LayerRegistry::CreateLayer(&poolingLayer, nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get())));
-//
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::PoolingLayer*>(dl.get()));
-//}
-//
-//TEST_F(MKLDNNLayersTests, canCreateSplitLayer) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    unique_ptr<MKLDNNPlugin::Context> ctx ( new MKLDNNPlugin::Context({}, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get()));
-//
-//    InferenceEngine::SplitLayer splitLayer({});
-//    unique_ptr<MKLDNNPlugin::Layer> dl ( MKLDNNPlugin::LayerRegistry::CreateLayer(&splitLayer, nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get())));
-//
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::SplitLayer*>(dl.get()));
-//}
-//
-//TEST_F(MKLDNNLayersTests, canCreateConcatLayer) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    unique_ptr<MKLDNNPlugin::Context> ctx ( new MKLDNNPlugin::Context({}, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get()));
-//
-//    InferenceEngine::ConcatLayer concatLayer({});
-//    unique_ptr<MKLDNNPlugin::Layer> dl ( MKLDNNPlugin::LayerRegistry::CreateLayer(&concatLayer, nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get())));
-//
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::ConcatLayer*>(dl.get()));
-//}
-//
-//TEST_F(MKLDNNLayersTests, canCreateFullyConnectedLayer) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    InferenceEngine::TBlob<float>::Ptr blobPtr(new InferenceEngine::TBlob<float>());
-//    unique_ptr<MKLDNNPlugin::Context> ctx ( new MKLDNNPlugin::Context(blobPtr, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get()));
-//
-//    InferenceEngine::FullyConnectedLayer fcLayer({});
-//    InferenceEngine::DataPtr dPtr(new InferenceEngine::Data("testData"));
-//    dPtr->dims = {0, 0, 0, 0};
-//    InferenceEngine::DataPtr dPtr2(new InferenceEngine::Data("testData2"));
-//    dPtr2->dims = {0, 0};
-//
-//    fcLayer.insData.push_back(dPtr);
-//    fcLayer.outData.push_back(dPtr2);
-//    unique_ptr<MKLDNNPlugin::Layer> dl ( MKLDNNPlugin::LayerRegistry::CreateLayer(&fcLayer, nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get())));
-//
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::FullyConnectedLayer*>(dl.get()));
-//}
-//
-//TEST_F(MKLDNNLayersTests, canCreateSoftMaxLayer) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    unique_ptr<MKLDNNPlugin::Context> ctx ( new MKLDNNPlugin::Context({}, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get()));
-//
-//    InferenceEngine::SoftMaxLayer softmaxLayer({});
-//    InferenceEngine::DataPtr dPtr(new InferenceEngine::Data("testData"));
-//    dPtr->dims = {0, 0, 0, 0};
-//    InferenceEngine::DataPtr dPtr2(new InferenceEngine::Data("testData2"));
-//    dPtr2->dims = {0, 0};
-//
-//    softmaxLayer.insData.push_back(dPtr);
-//    softmaxLayer.outData.push_back(dPtr2);
-//    unique_ptr<MKLDNNPlugin::Layer> dl ( MKLDNNPlugin::LayerRegistry::CreateLayer(&softmaxLayer, nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get())));
-//
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::SoftMaxLayer*>(dl.get()));
-//}
-//
-//TEST_F(MKLDNNLayersTests, canCreateReLULayer) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    unique_ptr<MKLDNNPlugin::Context> ctx ( new MKLDNNPlugin::Context({}, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get()));
-//
-//    InferenceEngine::ReLULayer reLULayer({});
-//    InferenceEngine::DataPtr dPtr(new InferenceEngine::Data("testData"));
-//    dPtr->dims = {1, 1, 27, 27};
-//
-//    reLULayer.insData.push_back(dPtr);
-//    reLULayer.outData.push_back(dPtr);
-//    unique_ptr<MKLDNNPlugin::Layer> dl ( MKLDNNPlugin::LayerRegistry::CreateLayer(&reLULayer, nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get())));
-//
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::ReLULayer*>(dl.get()));
-//}
-//
-//TEST_F(MKLDNNLayersTests, canNotCreateCNNLayer) {
-//    std::vector<float> sd;
-//    std::vector<float> dd;
-//    std::vector<size_t> ds;
-//    unique_ptr<MKLDNNPlugin::Context> ctx ( new MKLDNNPlugin::Context({}, mkldnn::engine(mkldnn::engine::cpu, 0), &sd, &dd, &ds));
-//    ASSERT_NE(nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get()));
-//
-//    InferenceEngine::CNNLayer cnnLayer({});
-//    EXPECT_THROW(MKLDNNPlugin::LayerRegistry::CreateLayer(&cnnLayer, nullptr, dynamic_cast<MKLDNNPlugin::Context*>(ctx.get())) , InferenceEngine::Exception);
-//}
-//
-//TEST_F(MKLDNNLayersTests, canNotCreateLayerWithoutContext) {
-//    InferenceEngine::ConvolutionLayer convLayer({});
-//    EXPECT_THROW(MKLDNNPlugin::LayerRegistry::CreateLayer(&convLayer, nullptr, nullptr), InferenceEngine::Exception);
-//}
\ No newline at end of file
diff --git a/ngraph/core/include/ngraph/op/util/attr_types.hpp b/ngraph/core/include/ngraph/op/util/attr_types.hpp
index 8f2b9078395..2525b96e818 100644
--- a/ngraph/core/include/ngraph/op/util/attr_types.hpp
+++ b/ngraph/core/include/ngraph/op/util/attr_types.hpp
@@ -336,6 +336,8 @@ namespace ngraph
             {
                 return a.m_type == m_type && a.m_axis == m_axis;
             }
+
+            bool operator!=(const AutoBroadcastSpec& a) const { return !(*this == a); }
             static const AutoBroadcastSpec NUMPY;
             static const AutoBroadcastSpec NONE;
 
diff --git a/ngraph/core/src/op/binary_convolution.cpp b/ngraph/core/src/op/binary_convolution.cpp
index fabe1d129a8..8460eee543d 100644
--- a/ngraph/core/src/op/binary_convolution.cpp
+++ b/ngraph/core/src/op/binary_convolution.cpp
@@ -117,7 +117,7 @@ bool op::v1::BinaryConvolution::visit_attributes(AttributeVisitor& visitor)
 namespace ngraph
 {
     template <>
-    EnumNames<op::v1::BinaryConvolution::BinaryConvolutionMode>&
+    NGRAPH_API EnumNames<op::v1::BinaryConvolution::BinaryConvolutionMode>&
         EnumNames<op::v1::BinaryConvolution::BinaryConvolutionMode>::get()
     {
         static auto enum_names = EnumNames<op::v1::BinaryConvolution::BinaryConvolutionMode>(
diff --git a/ngraph/core/src/op/depth_to_space.cpp b/ngraph/core/src/op/depth_to_space.cpp
index 4b97e199dea..761256d627e 100644
--- a/ngraph/core/src/op/depth_to_space.cpp
+++ b/ngraph/core/src/op/depth_to_space.cpp
@@ -238,7 +238,7 @@ bool op::DepthToSpace::evaluate(const HostTensorVector& outputs,
 namespace ngraph
 {
     template <>
-    EnumNames<op::DepthToSpace::DepthToSpaceMode>&
+    NGRAPH_API EnumNames<op::DepthToSpace::DepthToSpaceMode>&
         EnumNames<op::DepthToSpace::DepthToSpaceMode>::get()
     {
         static auto enum_names = EnumNames<op::DepthToSpace::DepthToSpaceMode>(
diff --git a/ngraph/core/src/op/interpolate.cpp b/ngraph/core/src/op/interpolate.cpp
index 3e3bf2a409c..43a966bbca5 100644
--- a/ngraph/core/src/op/interpolate.cpp
+++ b/ngraph/core/src/op/interpolate.cpp
@@ -581,7 +581,7 @@ namespace ngraph
     }
 
     template <>
-    EnumNames<op::v4::Interpolate::CoordinateTransformMode>&
+    NGRAPH_API EnumNames<op::v4::Interpolate::CoordinateTransformMode>&
         EnumNames<op::v4::Interpolate::CoordinateTransformMode>::get()
     {
         static auto enum_names = EnumNames<op::v4::Interpolate::CoordinateTransformMode>(
@@ -606,7 +606,8 @@ namespace ngraph
     }
 
     template <>
-    EnumNames<op::v4::Interpolate::NearestMode>& EnumNames<op::v4::Interpolate::NearestMode>::get()
+    NGRAPH_API EnumNames<op::v4::Interpolate::NearestMode>&
+        EnumNames<op::v4::Interpolate::NearestMode>::get()
     {
         static auto enum_names = EnumNames<op::v4::Interpolate::NearestMode>(
             "op::v4::Interpolate::NearestMode",
diff --git a/ngraph/core/src/op/roi_align.cpp b/ngraph/core/src/op/roi_align.cpp
index f02fb934c85..75aadc45b45 100644
--- a/ngraph/core/src/op/roi_align.cpp
+++ b/ngraph/core/src/op/roi_align.cpp
@@ -181,7 +181,8 @@ namespace ngraph
     constexpr DiscreteTypeInfo AttributeAdapter<op::v3::ROIAlign::PoolingMode>::type_info;
 
     template <>
-    EnumNames<op::v3::ROIAlign::PoolingMode>& EnumNames<op::v3::ROIAlign::PoolingMode>::get()
+    NGRAPH_API EnumNames<op::v3::ROIAlign::PoolingMode>&
+        EnumNames<op::v3::ROIAlign::PoolingMode>::get()
     {
         static auto enum_names =
             EnumNames<op::v3::ROIAlign::PoolingMode>("op::v3::ROIAlign::PoolingMode",
diff --git a/ngraph/core/src/op/space_to_depth.cpp b/ngraph/core/src/op/space_to_depth.cpp
index 288549e14f6..2532946d581 100644
--- a/ngraph/core/src/op/space_to_depth.cpp
+++ b/ngraph/core/src/op/space_to_depth.cpp
@@ -226,7 +226,7 @@ bool ngraph::op::v0::SpaceToDepth::evaluate(const HostTensorVector& outputs,
 namespace ngraph
 {
     template <>
-    EnumNames<op::v0::SpaceToDepth::SpaceToDepthMode>&
+    NGRAPH_API EnumNames<op::v0::SpaceToDepth::SpaceToDepthMode>&
         EnumNames<op::v0::SpaceToDepth::SpaceToDepthMode>::get()
     {
         static auto enum_names = EnumNames<op::v0::SpaceToDepth::SpaceToDepthMode>(
diff --git a/ngraph/core/src/pass/convert_precision.cpp b/ngraph/core/src/pass/convert_precision.cpp
index 987a69fcb35..2d5a0ab5eae 100644
--- a/ngraph/core/src/pass/convert_precision.cpp
+++ b/ngraph/core/src/pass/convert_precision.cpp
@@ -789,6 +789,11 @@ bool fuse_type_to_constant(const std::shared_ptr<ngraph::Node>& node,
             new_const =
                 change_constant_precision<element::Type_t::f64, element::Type_t::f32>(constant);
         }
+        else if (from == element::bf16 && to == element::f32)
+        {
+            new_const =
+                change_constant_precision<element::Type_t::bf16, element::Type_t::f32>(constant);
+        }
         else if (from == element::f32 && to == element::f16)
         {
             new_const =
diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py
index d97c735e970..21a44177a6f 100644
--- a/ngraph/python/tests/__init__.py
+++ b/ngraph/python/tests/__init__.py
@@ -48,9 +48,6 @@ xfail_issue_33581 = xfail_test(reason="RuntimeError: nGraph does not support the
                                       "GatherElements")
 xfail_issue_33633 = xfail_test(reason="MaxPool: dilations unsupported")
 xfail_issue_35911 = xfail_test(reason="Assertion error: Pad model mismatch error")
-xfail_issue_35912 = xfail_test(reason="RuntimeError: Error of validate layer: B with type: "
-                                      "Pad. Cannot parse parameter pads_end  from IR for layer B. "
-                                      "Value -1,0 cannot be casted to int.")
 xfail_issue_35923 = xfail_test(reason="RuntimeError: PReLU without weights is not supported")
 xfail_issue_35925 = xfail_test(reason="Assertion error - reduction ops results mismatch")
 xfail_issue_35927 = xfail_test(reason="RuntimeError: B has zero dimension that is not allowable")
@@ -120,6 +117,8 @@ xfail_issue_46765 = xfail_test(reason="select_last_index attribute is not suppor
 xfail_issue_47323 = xfail_test(reason="RuntimeError: The plugin does not support FP64")
 xfail_issue_47337 = xfail_test(reason="RuntimeError: Unsupported dynamic ops: v1::OneHot")
 xfail_issue_33593 = xfail_test(reason="Current implementation of MaxPool doesn't support indices output")
+xfail_issue_51993 = xfail_test(reason="PRelu supports only 1D tensor for 'slope' input broadcasted"
+                                      "by channel")
 
 # Model MSFT issues:
 xfail_issue_37957 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py
index 3b91a18344f..32442b0a375 100644
--- a/ngraph/python/tests/test_onnx/test_backend.py
+++ b/ngraph/python/tests/test_onnx/test_backend.py
@@ -60,7 +60,8 @@ from tests import (BACKEND_NAME,
                    xfail_issue_49752,
                    xfail_issue_49753,
                    xfail_issue_49754,
-                   xfail_issue_52463)
+                   xfail_issue_52463,
+                   xfail_issue_51993)
 
 
 def expect_fail(test_case_path, xfail):  # type: (str) -> None
@@ -383,6 +384,8 @@ tests_expected_to_fail = [
     (xfail_issue_33593,
      "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_strides_cpu",
      "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_pads_cpu",),
+    (xfail_issue_51993,
+     "OnnxBackendNodeModelTest.test_prelu_broadcast_cpu",)
 ]
 
 for test_group in tests_expected_to_fail:
diff --git a/ngraph/python/tests/test_onnx/test_ops_convpool.py b/ngraph/python/tests/test_onnx/test_ops_convpool.py
index 720cc319cb9..7a507a2b4e0 100644
--- a/ngraph/python/tests/test_onnx/test_ops_convpool.py
+++ b/ngraph/python/tests/test_onnx/test_ops_convpool.py
@@ -8,7 +8,7 @@ from onnx.helper import make_graph, make_model, make_node, make_tensor_value_inf
 
 from tests.runtime import get_runtime
 from tests.test_onnx.utils import get_node_model, import_onnx_model, run_model, run_node
-from tests import xfail_issue_35911, xfail_issue_35912
+from tests import xfail_issue_35911
 
 
 @pytest.fixture
@@ -307,7 +307,6 @@ def test_pad_opset_2():
         run_model(model, [x])
 
 
-@xfail_issue_35912
 def test_pad_negative_values_begin():
     x = np.ones((2, 2), dtype=np.float32)
 
@@ -322,7 +321,6 @@ def test_pad_negative_values_begin():
     assert np.array_equal(ng_result, np.array([[1], [1]]))
 
 
-@xfail_issue_35912
 def test_pad_negative_values_end():
     x = np.ones((2, 2), dtype=np.float32)
 
diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp
index a5de02359ee..e0fb314f1e9 100644
--- a/ngraph/test/onnx/onnx_import.in.cpp
+++ b/ngraph/test/onnx/onnx_import.in.cpp
@@ -2913,7 +2913,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization)
                      1.0739619f,  1.6918856f,  2.3098092f,  2.927733f,    3.5456567f,   4.1635804f,
                      -4.130463f,  -3.1876516f, -2.2448401f, -1.3020288f,  -0.35921717f, 0.5835942f,
                      1.5264057f,  2.469217f,   3.4120288f,  4.35484f,     5.2976513f,   6.240463f});
-    test_case.run();
+    const size_t tolerance_bits = 3;
+    test_case.run(tolerance_bits);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_instance_normalization_dynamic)
diff --git a/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml
index 5bc91171c9d..3fd3c120fa1 100644
--- a/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml
+++ b/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml
@@ -10,13 +10,13 @@
         <model path="public/mtcnn/mtcnn-r/FP32/mtcnn-r.xml" precision="FP32" test="create_exenetwork" device="GPU" vmsize="626194" vmpeak="626194" vmrss="403228" vmhwm="403228" />
         <model path="public/mtcnn/mtcnn-r/FP32/mtcnn-r.xml" precision="FP32" test="infer_request_inference" device="CPU" vmsize="958240" vmpeak="1043437" vmrss="31366" vmhwm="31366" />
         <model path="public/mtcnn/mtcnn-r/FP32/mtcnn-r.xml" precision="FP32" test="infer_request_inference" device="GPU" vmsize="854417" vmpeak="939614" vmrss="402339" vmhwm="402339" />
-        <model path="public/ssd300/FP32/ssd300.xml" precision="FP32" test="create_exenetwork" device="CPU" vmsize="1046988" vmpeak="1179042" vmrss="307990" vmhwm="439457" />
+        <model path="public/ssd300/FP32/ssd300.xml" precision="FP32" test="create_exenetwork" device="CPU" vmsize="1050000" vmpeak="1179042" vmrss="323000" vmhwm="439457" />
         <model path="public/ssd300/FP32/ssd300.xml" precision="FP32" test="create_exenetwork" device="GPU" vmsize="2969241" vmpeak="2969241" vmrss="1506492" vmhwm="1506492" />
         <model path="public/ssd300/FP32/ssd300.xml" precision="FP32" test="infer_request_inference" device="CPU" vmsize="1321819" vmpeak="1321819" vmrss="374207" vmhwm="439748" />
         <model path="public/ssd300/FP32/ssd300.xml" precision="FP32" test="infer_request_inference" device="GPU" vmsize="2605324" vmpeak="26900521" vmrss="1549958" vmhwm="1549958" />
-        <model path="public/vgg16/FP32/vgg16.xml" precision="FP32" test="create_exenetwork" device="CPU" vmsize="2133814" vmpeak="2836412" vmrss="1438049" vmhwm="2140533" />
+        <model path="public/vgg16/FP32/vgg16.xml" precision="FP32" test="create_exenetwork" device="CPU" vmsize="2300000" vmpeak="2836412" vmrss="1570000" vmhwm="2140533" />
         <model path="public/vgg16/FP32/vgg16.xml" precision="FP32" test="create_exenetwork" device="GPU" vmsize="2801422" vmpeak="3915366" vmrss="2465065" vmhwm="3578811" />
-        <model path="public/vgg16/FP32/vgg16.xml" precision="FP32" test="infer_request_inference" device="CPU" vmsize="2401380" vmpeak="2836412" vmrss="1469832" vmhwm="2140377" />
+        <model path="public/vgg16/FP32/vgg16.xml" precision="FP32" test="infer_request_inference" device="CPU" vmsize="2800000" vmpeak="2836412" vmrss="1590000" vmhwm="2140377" />
         <model path="public/vgg16/FP32/vgg16.xml" precision="FP32" test="infer_request_inference" device="GPU" vmsize="2892432" vmpeak="3939166" vmrss="2472017" vmhwm="3602924" />
         <!--Models with FP16 precision-->
         <model path="public/mobilenet-ssd/FP16/mobilenet-ssd.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1057487" vmpeak="1085224" vmrss="109694" vmhwm="137295" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
@@ -27,13 +27,13 @@
         <model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="2123113" vmpeak="2208310" vmrss="453814" vmhwm="453814" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
         <model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="955827" vmpeak="955827" vmrss="27222" vmhwm="27222" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
         <model path="public/mtcnn/mtcnn-r/FP16/mtcnn-r.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="1760990" vmpeak="1760990" vmrss="454173" vmhwm="454173" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
-        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1372961" vmpeak="1505639" vmrss="369969" vmhwm="501649" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
+        <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="1372961" vmpeak="1505639" vmrss="380000" vmhwm="501649" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
         <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="2746588" vmpeak="2831784" vmrss="1296328" vmhwm="1296328" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
         <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="1381265" vmpeak="1505472" vmrss="437039" vmhwm="500630" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
         <model path="public/ssd300/FP16/ssd300.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2380580" vmpeak="2465777" vmrss="1326369" vmhwm="1326369" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
-        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="2748220" vmpeak="3450818" vmrss="1783704" vmhwm="2486161" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
+        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="CPU" vmsize="2748220" vmpeak="3450818" vmrss="1840000" vmhwm="2486161" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
         <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="create_exenetwork" device="GPU" vmsize="2181312" vmpeak="2582752" vmrss="1060712" vmhwm="1629414" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
-        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="2749458" vmpeak="3450818" vmrss="1816765" vmhwm="2486525" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
+        <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="CPU" vmsize="2749458" vmpeak="3450818" vmrss="1860000" vmhwm="2486525" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
         <model path="public/vgg16/FP16/vgg16.xml" precision="FP16" test="infer_request_inference" device="GPU" vmsize="2910814" vmpeak="3347489" vmrss="1371380" vmhwm="1717102" /> # values from {"commit_id": "af63cb78ee5cbd66bac0d0980db61cb11b5d9995", "commit_date": "2021-03-03 15:44"} and *= 1.3
     </models>
 </attributes>

From 2c755aaf6f151b8d73af089466e1f6b2d9ced8c1 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@intel.com>
Date: Thu, 6 May 2021 20:08:42 +0300
Subject: [PATCH 70/73] Fix incorrect plural: childs -> children (#5532)

---
 .../legacy_api/include/legacy/graph_tools.hpp |  2 +-
 .../src/eltwise_base_transformation.cpp       |  8 +++----
 .../src/fuse_subtract_to_fake_quantize.cpp    |  4 ++--
 .../src/mkldnn_plugin/mkldnn_edge.cpp         |  4 ++--
 .../src/mkldnn_plugin/mkldnn_graph.cpp        | 20 ++++++++---------
 .../mkldnn_plugin/mkldnn_graph_optimizer.cpp  | 10 ++++-----
 .../src/plugin_api/xml_parse_utils.h          |  2 +-
 .../common/include/vpu/utils/perf_report.hpp  |  2 +-
 .../src/backend/get_meta_data.cpp             |  2 +-
 .../src/frontend/custom_layer.cpp             |  8 +++----
 .../src/utils/runtime_graph.cpp               |  2 +-
 ..._with_different_precision_on_children.cpp} | 12 +++++-----
 ..._with_different_precision_on_children.cpp} |  8 +++----
 ..._with_different_precision_on_children.cpp} |  8 +++----
 ..._with_different_precision_on_children.hpp} | 12 +++++-----
 ..._with_different_precision_on_children.cpp} | 22 +++++++++----------
 .../lpt_ngraph_functions/concat_function.hpp  |  4 ++--
 .../src/concat_function.cpp                   | 12 +++++-----
 18 files changed, 71 insertions(+), 71 deletions(-)
 rename inference-engine/tests/functional/inference_engine/lp_transformations/{concat_with_different_precision_on_childs.cpp => concat_with_different_precision_on_children.cpp} (95%)
 rename inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/{concat_with_different_precision_on_childs.cpp => concat_with_different_precision_on_children.cpp} (87%)
 rename inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/{concat_with_different_precision_on_childs.cpp => concat_with_different_precision_on_children.cpp} (87%)
 rename inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/{concat_with_different_precision_on_childs.hpp => concat_with_different_precision_on_children.hpp} (74%)
 rename inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/{concat_with_different_precision_on_childs.cpp => concat_with_different_precision_on_children.cpp} (78%)

diff --git a/inference-engine/src/legacy_api/include/legacy/graph_tools.hpp b/inference-engine/src/legacy_api/include/legacy/graph_tools.hpp
index 202ada193f0..23358128003 100644
--- a/inference-engine/src/legacy_api/include/legacy/graph_tools.hpp
+++ b/inference-engine/src/legacy_api/include/legacy/graph_tools.hpp
@@ -182,7 +182,7 @@ inline void UnorderedDFS(std::unordered_set<CNNLayer*>& visited, const Inference
         if (visitBefore) visit(cnnLayer);
         visited.insert(cnnLayer.get());
 
-        // visit childs
+        // visit children
         for (auto& od : cnnLayer->outData) {
             for (auto nl : getInputTo(od)) {
                 layers.push(nl.second);
diff --git a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp
index a119dcca64f..16084e611fe 100644
--- a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp
@@ -118,11 +118,11 @@ int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr<Node>& eltwise)
     }
 
     if (fakeQuantize1 && fakeQuantize2) {
-        size_t childs1 = fakeQuantize1->get_output_target_inputs(0).size();
-        size_t childs2 = fakeQuantize2->get_output_target_inputs(0).size();
-        if (childs1 == 1 && childs2 > 1)
+        size_t children1 = fakeQuantize1->get_output_target_inputs(0).size();
+        size_t children2 = fakeQuantize2->get_output_target_inputs(0).size();
+        if (children1 == 1 && children2 > 1)
             return 0;
-        if (childs1 > 1 && childs2 == 1)
+        if (children1 > 1 && children2 == 1)
             return 1;
     }
 
diff --git a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
index dc67cfe4110..6629bccc3c5 100644
--- a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
@@ -71,9 +71,9 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma
         return false;
     }
 
-    const auto childs = operation->get_output_target_inputs(0);
+    const auto children = operation->get_output_target_inputs(0);
 
-    for (const auto& target : childs) {
+    for (const auto& target : children) {
         const auto convolution = is_type<opset1::Convolution>(target.get_node());
         const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node());
         if (convolution || groupConvolution) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
index f09ae78bfef..a1bb9c2f55d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@@ -82,7 +82,7 @@ bool MKLDNNEdge::needReorder() {
             childCanChangeMem = true;
     }
 
-    const auto& detectInPlaceChildsNum = [](const std::vector<MKLDNNEdgePtr>& edges) -> size_t {
+    const auto& detectInPlaceChildrenNum = [](const std::vector<MKLDNNEdgePtr>& edges) -> size_t {
         size_t count = 0;
         for (const auto& edge : edges) {
             auto childSPD = edge->getChild()->getSelectedPrimitiveDescriptor();
@@ -98,7 +98,7 @@ bool MKLDNNEdge::needReorder() {
     };
 
     const auto portChildEdges = getParent()->getChildEdgesAtPort(inNumber);
-    if (in_place && childCanChangeMem && portChildEdges.size() > 1 && detectInPlaceChildsNum(portChildEdges) > 1)
+    if (in_place && childCanChangeMem && portChildEdges.size() > 1 && detectInPlaceChildrenNum(portChildEdges) > 1)
         canBeInPlaceConflicts = true;
     if (!canBeInPlaceConflicts && in_place && !getParent()->getChildEdges().empty()) {
         for (auto &p_edge_peer : portChildEdges) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index 75db9a073e3..a80c788f5ec 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -1000,7 +1000,7 @@ void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) {
         }
     };
 
-    auto childs = node->childEdges;
+    auto children = node->childEdges;
     auto parents = node->parentEdges;
 
     for (size_t i = 0; i < parents.size(); i++) {
@@ -1009,10 +1009,10 @@ void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) {
         auto parent = p_edge->getParent();
         if (!parent) continue;
 
-        for (size_t j = 0; j < childs.size(); j++) {
-            if (!childs[j].lock())
+        for (size_t j = 0; j < children.size(); j++) {
+            if (!children[j].lock())
                 continue;
-            auto child = childs[j].lock()->getChild();
+            auto child = children[j].lock()->getChild();
             if (!child)
                 continue;
 
@@ -1023,7 +1023,7 @@ void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) {
                 remEdge->drop();
                 removeEdge(*this, remEdge);
             }
-            remEdge = childs[j].lock();
+            remEdge = children[j].lock();
             int outNum = 0;
             if (remEdge) {
                 outNum = remEdge->getOutputNum();
@@ -1048,7 +1048,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
         }
     };
 
-    auto childs = node->childEdges;
+    auto children = node->childEdges;
     auto parents = node->parentEdges;
 
     auto parentConvEdge = parents[0].lock();
@@ -1062,10 +1062,10 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
         auto parent = p_edge->getParent();
         if (!parent) continue;
 
-        for (size_t j = 0; j < childs.size(); j++) {
-            if (!childs[j].lock())
+        for (size_t j = 0; j < children.size(); j++) {
+            if (!children[j].lock())
                 continue;
-            auto child = childs[j].lock()->getChild();
+            auto child = children[j].lock()->getChild();
             if (!child)
                 continue;
 
@@ -1076,7 +1076,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
                 remEdge->drop();
                 removeEdge(*this, remEdge);
             }
-            remEdge = childs[j].lock();
+            remEdge = children[j].lock();
             int outNum = 0;
             if (remEdge) {
                 outNum = remEdge->getOutputNum();
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
index 2d17b2b35df..d5f2e3819be 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -1310,7 +1310,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
 
             graph.DropNode(childNode);
         } else if (childNode->getType() == Eltwise) {
-            auto childs = childNode->childEdges;
+            auto children = childNode->childEdges;
             auto parents = childNode->parentEdges;
             auto initialParentInNum = parentNode->getParentEdges().size();
 
@@ -1321,10 +1321,10 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
                 if (!parent) continue;
 
                 if (parent == parentNode) {
-                    for (size_t j = 0; j < childs.size(); j++) {
-                        if (!childs[j].lock())
+                    for (size_t j = 0; j < children.size(); j++) {
+                        if (!children[j].lock())
                             continue;
-                        auto child = childs[j].lock()->getChild();
+                        auto child = children[j].lock()->getChild();
                         if (!child)
                             continue;
 
@@ -1335,7 +1335,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
                             remEdge->drop();
                             removeEdge(graph, remEdge);
                         }
-                        remEdge = childs[j].lock();
+                        remEdge = children[j].lock();
                         int outNum = 0;
                         if (remEdge) {
                             outNum = remEdge->getOutputNum();
diff --git a/inference-engine/src/plugin_api/xml_parse_utils.h b/inference-engine/src/plugin_api/xml_parse_utils.h
index a4b9338db0f..911745cdf43 100644
--- a/inference-engine/src/plugin_api/xml_parse_utils.h
+++ b/inference-engine/src/plugin_api/xml_parse_utils.h
@@ -25,7 +25,7 @@
 
 /**
  * @ingroup    ie_dev_api_xml
- * @brief      Defines convinient for-each based cycle to iterate over node childs
+ * @brief      Defines convinient for-each based cycle to iterate over node children
  *
  * @param      c     Child node name
  * @param      p     Parent node name
diff --git a/inference-engine/src/vpu/common/include/vpu/utils/perf_report.hpp b/inference-engine/src/vpu/common/include/vpu/utils/perf_report.hpp
index 4f4f552d619..db1cbb94580 100644
--- a/inference-engine/src/vpu/common/include/vpu/utils/perf_report.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/utils/perf_report.hpp
@@ -37,7 +37,7 @@ struct StageMetaInfo final {
     std::vector<ie::Precision> inputPrecisions;
     std::vector<ie::SizeVector> inputDims;
 
-    size_t childsNum = 0;
+    size_t childrenNum = 0;
 
     int execOrder = -1;
     float execTime = 0;
diff --git a/inference-engine/src/vpu/graph_transformer/src/backend/get_meta_data.cpp b/inference-engine/src/vpu/graph_transformer/src/backend/get_meta_data.cpp
index bc4704c65bb..b853ac932b7 100644
--- a/inference-engine/src/vpu/graph_transformer/src/backend/get_meta_data.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/backend/get_meta_data.cpp
@@ -137,7 +137,7 @@ void BackEnd::getMetaData(
 
                 if (it != stageToMetaIndex.end()) {
                     StageMetaInfo& meta = stagesMeta[it->second];
-                    stagesMeta[prIndex].childsNum++;
+                    stagesMeta[prIndex].childrenNum++;
                     meta.parentIndices.push_back(prIndex);
                     meta.inputDims.push_back(dataMeta.desc.getDims());
                     meta.inputPrecisions.push_back(dataMeta.desc.getPrecision());
diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp
index 996c0e732be..577608c11a4 100644
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp
@@ -37,8 +37,8 @@ namespace vpu {
 
 namespace {
 
-void assertExactlyOneOccurrence(const pugi::xml_node &node, const SmallVector<std::string>& childs) {
-    for (const auto &name : childs) {
+void assertExactlyOneOccurrence(const pugi::xml_node &node, const SmallVector<std::string>& children) {
+    for (const auto &name : children) {
         const auto& child = node.child(name.c_str());
         VPU_THROW_UNLESS(!child.empty(), "Required parameter %s is not found", name);
         VPU_THROW_UNLESS(child.next_sibling(name.c_str()).empty(),
@@ -46,8 +46,8 @@ void assertExactlyOneOccurrence(const pugi::xml_node &node, const SmallVector<st
     }
 }
 
-void assertOneOrMoreOccurrence(const pugi::xml_node &node, const SmallVector<std::string>& childs) {
-    for (const auto& name : childs) {
+void assertOneOrMoreOccurrence(const pugi::xml_node &node, const SmallVector<std::string>& children) {
+    for (const auto& name : children) {
         const auto& child = node.child(name.c_str());
         VPU_THROW_UNLESS(!child.empty(),
             "Required parameter %s is not found", name);
diff --git a/inference-engine/src/vpu/graph_transformer/src/utils/runtime_graph.cpp b/inference-engine/src/vpu/graph_transformer/src/utils/runtime_graph.cpp
index 120cd88ae78..659067f6fd8 100644
--- a/inference-engine/src/vpu/graph_transformer/src/utils/runtime_graph.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/utils/runtime_graph.cpp
@@ -80,7 +80,7 @@ InferenceEngine::CNNNetwork buildRuntimeGraph(GraphMetaInfo& graphMetaInfo, cons
         if (stageMeta.stageType == "Input") {
             params.emplace_back(std::make_shared<ngraph::op::Parameter>());
             node = params.back();
-        } else if (stageMeta.childsNum == 0) {
+        } else if (stageMeta.childrenNum == 0) {
             results.emplace_back(std::make_shared<ngraph::op::Result>(inputs.back()));
             node = results.back();
         } else {
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_different_precision_on_childs.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_different_precision_on_children.cpp
similarity index 95%
rename from inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_different_precision_on_childs.cpp
rename to inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_different_precision_on_children.cpp
index 9998f99b156..dc2567fb70d 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_different_precision_on_childs.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_different_precision_on_children.cpp
@@ -77,14 +77,14 @@ typedef std::tuple <
     ConcatTransformationTestValues
 > ConcatTransformationParams;
 
-class ConcatWithDifferentChildsTransformation : public LayerTransformation, public testing::WithParamInterface<ConcatTransformationParams> {
+class ConcatWithDifferentChildrenTransformation : public LayerTransformation, public testing::WithParamInterface<ConcatTransformationParams> {
 public:
     void SetUp() override {
         const ngraph::element::Type precision = std::get<0>(GetParam());
         const ngraph::Shape inputShape = std::get<1>(GetParam());
         ConcatTransformationTestValues testValues = std::get<2>(GetParam());
 
-        actualFunction = ngraph::builder::subgraph::ConcatFunction::getOriginalWithDifferentPrecisionOnChilds(
+        actualFunction = ngraph::builder::subgraph::ConcatFunction::getOriginalWithDifferentPrecisionOnChildren(
             precision,
             inputShape,
             testValues.actual.fakeQuantize1,
@@ -100,7 +100,7 @@ public:
         transform.add<ngraph::pass::low_precision::ClampTransformation, ngraph::opset1::Clamp>(testValues.params);
         transform.transform(actualFunction);
 
-        referenceFunction = ngraph::builder::subgraph::ConcatFunction::getReferenceWithDifferentPrecisionOnChilds(
+        referenceFunction = ngraph::builder::subgraph::ConcatFunction::getReferenceWithDifferentPrecisionOnChildren(
             precision,
             inputShape,
             testValues.multiChannels,
@@ -128,7 +128,7 @@ public:
     }
 };
 
-TEST_P(ConcatWithDifferentChildsTransformation, CompareFunctions) {
+TEST_P(ConcatWithDifferentChildrenTransformation, CompareFunctions) {
     actualFunction->validate_nodes_and_infer_types();
     auto res = compare_functions(referenceFunction, actualFunction, true, true, true);
     ASSERT_TRUE(res.first) << res.second;
@@ -239,10 +239,10 @@ const std::vector<ConcatTransformationTestValues> testValues = {
 
 INSTANTIATE_TEST_CASE_P(
     smoke_LPT,
-    ConcatWithDifferentChildsTransformation,
+    ConcatWithDifferentChildrenTransformation,
     ::testing::Combine(
         ::testing::ValuesIn(precisions),
         ::testing::ValuesIn(shapes),
         ::testing::ValuesIn(testValues)),
-    ConcatWithDifferentChildsTransformation::getTestCaseName);
+    ConcatWithDifferentChildrenTransformation::getTestCaseName);
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_childs.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_children.cpp
similarity index 87%
rename from inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_childs.cpp
rename to inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_children.cpp
index 05361b264b3..fe0cd0b5465 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_childs.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_children.cpp
@@ -4,7 +4,7 @@
 
 #include <vector>
 
-#include "low_precision_transformations/concat_with_different_precision_on_childs.hpp"
+#include "low_precision_transformations/concat_with_different_precision_on_children.hpp"
 #include "common_test_utils/test_constants.hpp"
 
 using namespace LayerTestsDefinitions;
@@ -20,7 +20,7 @@ const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> tras
     LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8()
 };
 
-const std::vector<ConcatWithDifferentChildsTransformationParam> testValues = {
+const std::vector<ConcatWithDifferentChildrenTransformationParam> testValues = {
     // U8
     {
         { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
@@ -45,7 +45,7 @@ const std::vector<ConcatWithDifferentChildsTransformationParam> testValues = {
 
 const std::vector<bool> multiChannel = { true/*, false*/ };
 
-INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithDifferentChildsTransformation,
+INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithDifferentChildrenTransformation,
     ::testing::Combine(
         ::testing::ValuesIn(netPrecisions),
         ::testing::Values(ngraph::Shape({ 1, 6, 10, 10 })),
@@ -53,5 +53,5 @@ INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithDifferentChildsTransformation,
         ::testing::ValuesIn(testValues),
         ::testing::ValuesIn(trasformationParamValues),
         ::testing::ValuesIn(multiChannel)),
-    ConcatWithDifferentChildsTransformation::getTestCaseName);
+    ConcatWithDifferentChildrenTransformation::getTestCaseName);
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_childs.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_children.cpp
similarity index 87%
rename from inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_childs.cpp
rename to inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_children.cpp
index 892373d5649..0b283483716 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_childs.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_children.cpp
@@ -4,7 +4,7 @@
 
 #include <vector>
 
-#include "low_precision_transformations/concat_with_different_precision_on_childs.hpp"
+#include "low_precision_transformations/concat_with_different_precision_on_children.hpp"
 #include "common_test_utils/test_constants.hpp"
 
 using namespace LayerTestsDefinitions;
@@ -20,7 +20,7 @@ const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> tras
     LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8()
 };
 
-const std::vector<ConcatWithDifferentChildsTransformationParam> testValues = {
+const std::vector<ConcatWithDifferentChildrenTransformationParam> testValues = {
     // U8
     {
         { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} },
@@ -45,7 +45,7 @@ const std::vector<ConcatWithDifferentChildsTransformationParam> testValues = {
 
 const std::vector<bool> multiChannel = { true/*, false*/ };
 
-INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithDifferentChildsTransformation,
+INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithDifferentChildrenTransformation,
     ::testing::Combine(
         ::testing::ValuesIn(netPrecisions),
         ::testing::Values(ngraph::Shape({ 1, 3, 10, 10 })),
@@ -53,5 +53,5 @@ INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithDifferentChildsTransformation,
         ::testing::ValuesIn(testValues),
         ::testing::ValuesIn(trasformationParamValues),
         ::testing::ValuesIn(multiChannel)),
-    ConcatWithDifferentChildsTransformation::getTestCaseName);
+    ConcatWithDifferentChildrenTransformation::getTestCaseName);
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_different_precision_on_childs.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_different_precision_on_children.hpp
similarity index 74%
rename from inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_different_precision_on_childs.hpp
rename to inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_different_precision_on_children.hpp
index 509b673aade..50ea25c73d2 100644
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_different_precision_on_childs.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/concat_with_different_precision_on_children.hpp
@@ -11,7 +11,7 @@
 #include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
 
 namespace LayerTestsDefinitions {
-class ConcatWithDifferentChildsTransformationParam {
+class ConcatWithDifferentChildrenTransformationParam {
 public:
     ngraph::builder::subgraph::FakeQuantizeOnData fqOnData1;
     ngraph::builder::subgraph::FakeQuantizeOnData fqOnData2;
@@ -21,16 +21,16 @@ typedef std::tuple<
     ngraph::element::Type,
     ngraph::Shape,
     std::string, // target device: CPU, GPU
-    ConcatWithDifferentChildsTransformationParam,
+    ConcatWithDifferentChildrenTransformationParam,
     ngraph::pass::low_precision::LayerTransformation::Params, // transformation parameters
     // multichannel
-    bool> ConcatWithDifferentChildsTransformationParams;
+    bool> ConcatWithDifferentChildrenTransformationParams;
 
-class ConcatWithDifferentChildsTransformation :
-    public testing::WithParamInterface<ConcatWithDifferentChildsTransformationParams>,
+class ConcatWithDifferentChildrenTransformation :
+    public testing::WithParamInterface<ConcatWithDifferentChildrenTransformationParams>,
     public LayerTestsUtils::LayerTransformation {
 public:
-    static std::string getTestCaseName(testing::TestParamInfo<ConcatWithDifferentChildsTransformationParams> obj);
+    static std::string getTestCaseName(testing::TestParamInfo<ConcatWithDifferentChildrenTransformationParams> obj);
     InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
 
 protected:
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_different_precision_on_childs.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_different_precision_on_children.cpp
similarity index 78%
rename from inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_different_precision_on_childs.cpp
rename to inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_different_precision_on_children.cpp
index a7da85ed9b6..7688d4e7a8b 100644
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_different_precision_on_childs.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/concat_with_different_precision_on_children.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "low_precision_transformations/concat_with_different_precision_on_childs.hpp"
+#include "low_precision_transformations/concat_with_different_precision_on_children.hpp"
 
 #include <memory>
 #include <tuple>
@@ -19,11 +19,11 @@ using namespace InferenceEngine::details;
 
 namespace LayerTestsDefinitions {
 
-std::string ConcatWithDifferentChildsTransformation::getTestCaseName(testing::TestParamInfo<ConcatWithDifferentChildsTransformationParams> obj) {
+std::string ConcatWithDifferentChildrenTransformation::getTestCaseName(testing::TestParamInfo<ConcatWithDifferentChildrenTransformationParams> obj) {
     ngraph::element::Type netPrecision;
     ngraph::Shape inputShapes;
     std::string targetDevice;
-    ConcatWithDifferentChildsTransformationParam param;
+    ConcatWithDifferentChildrenTransformationParam param;
     ngraph::pass::low_precision::LayerTransformation::Params params;
     bool multiChannel;
     std::tie(netPrecision, inputShapes, targetDevice, param, params, multiChannel) = obj.param;
@@ -36,11 +36,11 @@ std::string ConcatWithDifferentChildsTransformation::getTestCaseName(testing::Te
     return result.str();
 }
 
-InferenceEngine::Blob::Ptr ConcatWithDifferentChildsTransformation::GenerateInput(const InferenceEngine::InputInfo &info) const {
+InferenceEngine::Blob::Ptr ConcatWithDifferentChildrenTransformation::GenerateInput(const InferenceEngine::InputInfo &info) const {
     ngraph::element::Type netPrecision;
     ngraph::Shape inputShapes;
     std::string targetDevice;
-    ConcatWithDifferentChildsTransformationParam param;
+    ConcatWithDifferentChildrenTransformationParam param;
     ngraph::pass::low_precision::LayerTransformation::Params params;
     bool multiChannel;
     std::tie(netPrecision, inputShapes, targetDevice, param, params, multiChannel) = this->GetParam();
@@ -49,25 +49,25 @@ InferenceEngine::Blob::Ptr ConcatWithDifferentChildsTransformation::GenerateInpu
     return LayerTransformation::GenerateInput(params.precisionsOnActivations[0], info.getTensorDesc(), k);
 }
 
-void ConcatWithDifferentChildsTransformation::SetUp() {
+void ConcatWithDifferentChildrenTransformation::SetUp() {
     ngraph::element::Type netPrecision;
     ngraph::Shape inputShapes;
-    ConcatWithDifferentChildsTransformationParam param;
+    ConcatWithDifferentChildrenTransformationParam param;
     ngraph::pass::low_precision::LayerTransformation::Params params;
     bool multiChannel;
     std::tie(netPrecision, inputShapes, targetDevice, param, params, multiChannel) = this->GetParam();
 
-    function = ngraph::builder::subgraph::ConcatFunction::getOriginalWithDifferentPrecisionOnChilds(
+    function = ngraph::builder::subgraph::ConcatFunction::getOriginalWithDifferentPrecisionOnChildren(
         netPrecision, inputShapes, param.fqOnData1, param.fqOnData2);
 
     validate();
 }
 
-void ConcatWithDifferentChildsTransformation::validate() {
+void ConcatWithDifferentChildrenTransformation::validate() {
     ngraph::element::Type netPrecision;
     ngraph::Shape inputShapes;
     std::string targetDevice;
-    ConcatWithDifferentChildsTransformationParam param;
+    ConcatWithDifferentChildrenTransformationParam param;
     ngraph::pass::low_precision::LayerTransformation::Params params;
     bool multiChannel;
     std::tie(netPrecision, inputShapes, targetDevice, param, params, multiChannel) = this->GetParam();
@@ -83,7 +83,7 @@ void ConcatWithDifferentChildsTransformation::validate() {
     }
 }
 
-TEST_P(ConcatWithDifferentChildsTransformation, CompareWithRefImpl) {
+TEST_P(ConcatWithDifferentChildrenTransformation, CompareWithRefImpl) {
     Run();
 };
 
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
index 7fe001835db..4d0c7c249e7 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp
@@ -71,7 +71,7 @@ public:
         const bool ssBeforeConcat,
         const bool ssAfterConcat);
 
-    static std::shared_ptr<ngraph::Function> getOriginalWithDifferentPrecisionOnChilds(
+    static std::shared_ptr<ngraph::Function> getOriginalWithDifferentPrecisionOnChildren(
         const ngraph::element::Type precision,
         const ngraph::Shape& inputShape,
         const FakeQuantizeOnData& fqOnData1,
@@ -182,7 +182,7 @@ public:
         const DequantizationOperations& deqAfter1,
         const DequantizationOperations& deqAfter2);
 
-    static std::shared_ptr<ngraph::Function> getReferenceWithDifferentPrecisionOnChilds(
+    static std::shared_ptr<ngraph::Function> getReferenceWithDifferentPrecisionOnChildren(
         const ngraph::element::Type precision,
         const ngraph::Shape& inputShape,
         const bool multiChannel,
diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
index 063825b1872..64357d96aeb 100644
--- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
+++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp
@@ -490,12 +490,12 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithStridedSlice(
     std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
         results,
         ngraph::ParameterVector{ input },
-        "ConcatWithDifferentChildsTransformation");
+        "ConcatWithDifferentChildrenTransformation");
 
     return function;
 }
 
-std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithDifferentPrecisionOnChilds(
+std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithDifferentPrecisionOnChildren(
     const ngraph::element::Type precision,
     const ngraph::Shape& inputShape,
     const FakeQuantizeOnData& fqOnData1,
@@ -550,7 +550,7 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getOriginalWithDifferentPrecis
     std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
         results,
         ngraph::ParameterVector{ input1, input2 },
-        "ConcatWithDifferentChildsTransformation");
+        "ConcatWithDifferentChildrenTransformation");
 
     return function;
 }
@@ -1259,12 +1259,12 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithStridedSlice(
     std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
         results,
         ngraph::ParameterVector{ input },
-        "ConcatWithDifferentChildsTransformation");
+        "ConcatWithDifferentChildrenTransformation");
 
     return function;
 }
 
-std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithDifferentPrecisionOnChilds(
+std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithDifferentPrecisionOnChildren(
     const ngraph::element::Type precision,
     const ngraph::Shape& inputShape,
     const bool multiChannel,
@@ -1340,7 +1340,7 @@ std::shared_ptr<ngraph::Function> ConcatFunction::getReferenceWithDifferentPreci
     std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
         results,
         ngraph::ParameterVector{ input1, input2 },
-        "ConcatWithDifferentChildsTransformation");
+        "ConcatWithDifferentChildrenTransformation");
 
     return function;
 }

From a8b5f1f4a3b2b97b8d3f25463dd47605d5598fbb Mon Sep 17 00:00:00 2001
From: Artemy Skrebkov <artemy.skrebkov@intel.com>
Date: Thu, 6 May 2021 20:57:59 +0300
Subject: [PATCH 71/73] Update opencv package for yocto (#5536)

---
 inference-engine/cmake/dependencies.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake
index dfca56ba5c1..a489d51904d 100644
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -199,7 +199,7 @@ if (ENABLE_OPENCV)
 
     set(OPENCV_VERSION "4.5.2")
     set(OPENCV_BUILD "076")
-    set(OPENCV_BUILD_YOCTO "708")
+    set(OPENCV_BUILD_YOCTO "772")
 
     if (AARCH64)
         if(DEFINED ENV{THIRDPARTY_SERVER_PATH})
@@ -219,7 +219,7 @@ if (ENABLE_OPENCV)
                     TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}/opencv"
                     ENVIRONMENT "OpenCV_DIR"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*"
-                    SHA256 "ee3e5255f381b8de5e6fffe4e43dae8c99035377d0380f9183bd7341f1d0f204")
+                    SHA256 "23c250796ad5fc9db810e1680ccdb32c45dc0e50cace5e0f02b30faf652fe343")
 
             unset(IE_PATH_TO_DEPS)
         endif()

From a8289b58c46af6487535cd4a6e443882b7e6843a Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Thu, 6 May 2021 21:21:23 +0300
Subject: [PATCH 72/73] Fixed TBBBind_2.4 usage for RelWithDebInfo (#5535)

---
 inference-engine/src/inference_engine/CMakeLists.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index 5fcce933c30..66a43ff315d 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -8,6 +8,11 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
     find_package(TBBBIND_2_4 QUIET)
     if (TBBBIND_2_4_FOUND)
         message(STATUS "Static tbbbind_2_4 package was found")
+
+        # WA: need to update TBBBind_2_4 package
+        set_target_properties(TBBbind::tbbbind_2_4 PROPERTIES
+            MAP_IMPORTED_CONFIG_MINSIZEREL Release
+            MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release)
     endif()
 endif()
 

From 8645c08396942d7ad811f2d46dce6435dff3be53 Mon Sep 17 00:00:00 2001
From: Vladimir Zinoviev <vladimir.zinoviev@intel.com>
Date: Thu, 6 May 2021 21:48:36 +0300
Subject: [PATCH 73/73] [LPT] Zero point insertion in case of zero value on FQ
 output high (#5467)

* [LPT] Zero point insertion in case of zero value on FQ output high

* [LPT] Change precision in test on the real default precision[0]
---
 .../src/layer_transformation.cpp              | 22 ++++----
 .../fake_quantize_transformation.cpp          |  6 ++-
 .../fake_quantize_transformation.cpp          | 52 ++++++++++---------
 3 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
index dbbb8206b4d..834aa6931c5 100644
--- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
@@ -221,18 +221,20 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
     bool hasZeroPoint = false;
     for (size_t i = 0; i < quantizationDetails.outputLowValues.size(); ++i) {
         const bool signedInterval = std::signbit(quantizationDetails.outputLowValues[i]) != std::signbit(quantizationDetails.outputHighValues[i]);
-        const bool boundaryValuesAreNotZero =
-            (std::fabs(quantizationDetails.outputLowValues[i]) >= zeroThreshold) &&
-            (std::fabs(quantizationDetails.outputHighValues[i]) >= zeroThreshold);
-        if (signedInterval && boundaryValuesAreNotZero) {
+        const bool outputLowValueIsNotZero = std::fabs(quantizationDetails.outputLowValues[i]) >= zeroThreshold;
+        if (signedInterval && outputLowValueIsNotZero) {
             // signed
             unsignedPrecision = false;
             hasNegative = true;
 
-            const float expectedRatio = quantizationDetails.levels == 256 ? asymmetricIntervalSideRatio256 : -1.f;
-            const float actualRatio = quantizationDetails.outputLowValues[i] / quantizationDetails.outputHighValues[i];
-            const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio));
-            if (actual > quantizationIntervalAsymmetryThreshold) {
+            if (quantizationDetails.outputHighValues[i] != 0.f) {
+                const float expectedRatio = quantizationDetails.levels == 256 ? asymmetricIntervalSideRatio256 : -1.f;
+                const float actualRatio = quantizationDetails.outputLowValues[i] / quantizationDetails.outputHighValues[i];
+                const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio));
+                if (actual > quantizationIntervalAsymmetryThreshold) {
+                    hasZeroPoint = true;
+                }
+            } else {
                 hasZeroPoint = true;
             }
 #ifdef LPT_PRINT_DEQUANTIZATION_INFO
@@ -244,8 +246,8 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
         } else {
             // unsigned
             signedPrecision = false;
-            if (boundaryValuesAreNotZero) {
-                hasZeroPoint = boundaryValuesAreNotZero;
+            if (outputLowValueIsNotZero) {
+                hasZeroPoint = outputLowValueIsNotZero;
             }
 
 #ifdef LPT_PRINT_DEQUANTIZATION_INFO
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
index 2f856a61cd7..0208a6b0e72 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
@@ -42,6 +42,10 @@ const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
         { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } },
         "Pooling", "U8"
     },
+    {
+        { 256ul, {}, { -127.5f }, { 0.f }, { -127.5f }, { 0.f } },
+        "Pooling", "U8"
+    },
     {
         { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
         "Pooling", "FP32"
@@ -50,7 +54,7 @@ const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
         { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } },
         "Pooling", "FP32"
     },
-        // nGraph: I8->FP32 Convert is not supported
+    // nGraph: I8->FP32 Convert is not supported
     // { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } },
     // { 256ul, { 1ul }, { -1.28f} , { 1.27f } }
 };
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
index 35f047794da..62fb144c785 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp
@@ -26,30 +26,34 @@ const std::vector<LayerTransformation::Params> trasformationParamValues = {
 };
 
 const std::vector<FakeQuantizeTransformationParam> fakeQuantizeOnDataValues = {
-        {
-                {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
-                "Pooling", "U8"
-        },
-        {
-                { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
-                "Pooling", "U8"
-        },
-        {
-                { 256ul, {}, { 0.f }, { 2.55f }, { -1.28f }, { 1.27f } },
-                "Pooling", "I8"
-        },
-        {
-                { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } },
-                "Pooling", "U8"
-        },
-        {
-                { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
-                "Pooling", "FP32"
-        },
-        {
-                { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } },
-                "Pooling", "FP32"
-        },
+    {
+        {256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
+        "Pooling", "U8"
+    },
+    {
+        { 256ul, { 1ul }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
+        "Pooling", "U8"
+    },
+    {
+        { 256ul, {}, { 0.f }, { 2.55f }, { -1.28f }, { 1.27f } },
+        "Pooling", "I8"
+    },
+    {
+        { 256ul, {}, { 0.f }, { 2.55f }, { 2.55f }, { 2.55f } },
+        "Pooling", "U8"
+    },
+    {
+        { 256ul, {}, { -127.5f }, { 0.f }, { -127.5f }, { 0.f } },
+        "Pooling", "U8"
+    },
+    {
+        { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } },
+        "Pooling", "FP32"
+    },
+    {
+        { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } },
+        "Pooling", "FP32"
+    },
     // nGraph: I8->FP32 Convert is not supported
     // { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } },
     // { 256ul, { 1ul }, { -1.28f} , { 1.27f } }